diff --git a/docs/DXIL.rst b/docs/DXIL.rst
index 8007a1ef48..30de3d0451 100644
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@@ -2421,10 +2421,10 @@ ID  Name                                                  Description
 302 ReservedC9                                            reserved
 303 RawBufferVectorLoad                                   reads from a raw buffer and structured buffer
 304 RawBufferVectorStore                                  writes to a RWByteAddressBuffer or RWStructuredBuffer
-305 MatVecMul                                             Multiplies a MxK dimension matrix and a K sized input vector
-306 MatVecMulAdd                                          multiplies a MxK dimension matrix and a K sized input vector and adds an M-sized bias vector
-307 OuterProductAccumulate                                Computes the outer product between column vectors and an MxN matrix is accumulated component-wise atomically (with device scope) in memory
-308 VectorAccumulate                                      Accumulates the components of a vector component-wise atomically (with device scope) to the corresponding elements of an array in memory
+305 ReservedD0                                            reserved
+306 ReservedD1                                            reserved
+307 ReservedD2                                            reserved
+308 ReservedD3                                            reserved
 309 VectorReduceAnd                                       Bitwise AND reduction of the vector returning a scalar
 310 VectorReduceOr                                        Bitwise OR reduction of the vector returning a scalar
 311 FDot                                                  computes the n-dimensional vector dot-product
@@ -3095,9 +3095,9 @@ ID         Name                                     Description
 2147483675 LinAlgMatrixAccumulateToDescriptor       accumulates a matrix to a RWByteAddressBuffer
 2147483676 LinAlgMatrixAccumulateToMemory           accumulates a matrix to groupshared memory
 2147483677 LinAlgMatrixOuterProduct                 Outer products an M sized vector and a N sized vector producing an MxN matrix
-2147483678 ReservedD1                               reserved
-2147483679 ReservedD2                               reserved
-2147483680 ReservedD3                               reserved
+2147483678 ReservedE1                               reserved
+2147483679 ReservedE2                               reserved
+2147483680 ReservedE3                               reserved
 2147483681 DebugBreak                               triggers a breakpoint if a debugger is attached
 2147483682 IsDebuggerPresent                        returns true if a debugger is attached
 ========== ======================================== ===================================================================================================================
@@ -3120,301 +3120,291 @@ The set of validation rules that are known to hold for a DXIL program is identif
 .. <py::lines('VALRULES-RST')>hctdb_instrhelp.get_valrules_rst()</py>
 .. VALRULES-RST:BEGIN
 
-============================================================= ========================================================================================================================================================================================================================================================================================================
-Rule Code                                                     Description
-============================================================= ========================================================================================================================================================================================================================================================================================================
-BITCODE.VALID                                                 Module must be bitcode-valid
-CONTAINER.CONTENTINVALID                                      DXIL Container Content is well-formed
-CONTAINER.CONTENTMATCHES                                      DXIL Container Content must match Module
-CONTAINER.PARTINVALID                                         DXIL Container must not contain unknown parts
-CONTAINER.PARTMATCHES                                         DXIL Container Parts must match Module
-CONTAINER.PARTMISSING                                         DXIL Container requires certain parts, corresponding to module
-CONTAINER.PARTREPEATED                                        DXIL Container must have only one of each part type
-CONTAINER.ROOTSIGNATUREINCOMPATIBLE                           Root Signature in DXIL Container must be compatible with shader
-CONTAINER.UNUSEDITEMINTABLE                                   Items in Table must be used
-DECL.ALLOCATERAYQUERY2FLAGSARECONST                           constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant
-DECL.ALLOCATERAYQUERYFLAGSARECONST                            RayFlags for AllocateRayQuery must be constant
-DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE         When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument
-DECL.ATTRSTRUCT                                               Attributes parameter must be struct type
-DECL.DXILFNEXTERN                                             External function must be a DXIL function
-DECL.DXILNSRESERVED                                           The DXIL reserved prefixes must only be used by built-in functions and types
-DECL.EXTRAARGS                                                Extra arguments not allowed for shader functions
-DECL.FNATTRIBUTE                                              Functions should only contain known function attributes
-DECL.FNFLATTENPARAM                                           Function parameters must not use struct types
-DECL.FNISCALLED                                               Functions can only be used by call instructions
-DECL.MULTIPLENODEINPUTS                                       A node shader may not have more than one input record
-DECL.NODELAUNCHINPUTTYPE                                      Invalid input record type for node launch type
-DECL.NOTUSEDEXTERNAL                                          External declaration should not be used
-DECL.PARAMSTRUCT                                              Callable function parameter must be struct type
-DECL.PAYLOADSTRUCT                                            Payload parameter must be struct type
-DECL.RAYQUERYINFNSIG                                          Rayquery objects not allowed in function signatures
-DECL.RESOURCEINFNSIG                                          Resources not allowed in function signatures
-DECL.SHADERMISSINGARG                                         payload/params/attributes parameter is required for certain shader types
-DECL.SHADERRETURNVOID                                         Shader functions must return void
-DECL.USEDEXTERNALFUNCTION                                     External function must be used
-DECL.USEDINTERNAL                                             Internal declaration must be used
-FLOW.DEADLOOP                                                 Loop must have break.
-FLOW.FUNCTIONCALL                                             Function with parameter is not permitted
-FLOW.NORECURSION                                              Recursion is not permitted.
-FLOW.REDUCIBLE                                                Execution flow must be reducible.
-INSTR.ALLOWED                                                 Instructions must be of an allowed type.
-INSTR.ATOMICCONST                                             Constant destination to atomic.
-INSTR.ATOMICINTRINNONUAV                                      Non-UAV destination to atomic intrinsic.
-INSTR.ATOMICOPNONGROUPSHAREDORRECORD                          Non-groupshared or node record destination to atomic operation.
-INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION                        Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function.
-INSTR.BARRIERFLAGINVALID                                      Invalid %0 flags on DXIL operation '%1'
-INSTR.BARRIERMODEFORNONCS                                     sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal).
-INSTR.BARRIERMODENOMEMORY                                     sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory).  Only _t (thread group sync) is optional.
-INSTR.BARRIERMODEUSELESSUGROUP                                sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
-INSTR.BARRIERNONCONSTANTFLAGARGUMENT                          Memory type, access, or sync flag is not constant
-INSTR.BARRIERREQUIRESNODE                                     sync in a non-Node Shader must not sync node record memory.
-INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER                      BufferUpdateCounter valid only when HasCounter is true.
-INSTR.BUFFERUPDATECOUNTERONUAV                                BufferUpdateCounter valid only on UAV.
-INSTR.CALLOLOAD                                               Call to DXIL intrinsic must match overload signature
-INSTR.CANNOTPULLPOSITION                                      pull-model evaluation of position disallowed
-INSTR.CBUFFERCLASSFORCBUFFERHANDLE                            Expect Cbuffer for CBufferLoad handle.
-INSTR.CBUFFEROUTOFBOUND                                       Cbuffer access out of bound.
-INSTR.CHECKACCESSFULLYMAPPED                                  CheckAccessFullyMapped should only be used on resource status.
-INSTR.CONSTALIGNFORRAWBUF                                     Raw Buffer alignment value must be a constant.
-INSTR.COORDINATECOUNTFORRAWTYPEDBUF                           raw/typed buffer offset must be undef.
-INSTR.COORDINATECOUNTFORSTRUCTBUF                             structured buffer requires defined index and offset coordinates.
-INSTR.CREATEHANDLEIMMRANGEID                                  Local resource must map to global resource.
-INSTR.DXILSTRUCTUSER                                          Dxil struct types should only be used by ExtractValue.
-INSTR.DXILSTRUCTUSEROUTOFBOUND                                Index out of bound when extract value from dxil struct types.
-INSTR.EVALINTERPOLATIONMODE                                   Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample.
-INSTR.EXPDXILOPCODEREQUIRESEXPSM                              Use of experimental DXILOpCode requires an experimental shader model.
-INSTR.EXTRACTVALUE                                            ExtractValue should only be used on dxil struct types and cmpxchg.
-INSTR.FAILTORESLOVETGSMPOINTER                                TGSM pointers must originate from an unambiguous TGSM global variable.
-INSTR.HANDLENOTFROMCREATEHANDLE                               Resource handle should returned by createHandle.
-INSTR.ILLEGALDXILOPCODE                                       DXILOpCode must be valid or a supported experimental opcode.
-INSTR.ILLEGALDXILOPFUNCTION                                   '%0' is not a DXILOpFuncition for DXILOpcode '%1'.
-INSTR.IMMBIASFORSAMPLEB                                       bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate.
-INSTR.INBOUNDSACCESS                                          Access to out-of-bounds memory is disallowed.
-INSTR.LINALGINTERPRETATIONPARAMARECONST                       In Linalg operations, Interpretation value is a constant.
-INSTR.LINALGINVALIDMATRIXLAYOUTVALUEFORMATVECOPS              Matrix Layout for Linalg Mul/MulAdd operation must be valid.
-INSTR.LINALGINVALIDMATRIXLAYOUTVALUEFOROUTERPRODUCTACCUMULATE Matrix Layout for Linalg Mul/MulAdd operation must be valid.
-INSTR.LINALGINVALIDMEMORYINTERPVALUE                          In Memory Interpolation value must be valid.
-INSTR.LINALGINVALIDREGISTERINTERPVALUE                        From Register Interpretation value must be valid.
-INSTR.LINALGMATRIXLAYOUTNOTTRANSPOSABLE                       Row Major and Column Major matrix layouts are not transposable.
-INSTR.LINALGMATRIXSHAPEPARAMSARECONST                         Matrix Layout, Dimensions and isTranspose are constants
-INSTR.LINALGMATRIXSTRIDEZEROFOROPTIMALLAYOUTS                 For optimal layouts, matrix stride must be zero.
-INSTR.LINALGNOTANUNSIGNEDTYPE                                 Unsigned flag set for a float signed type
-INSTR.MATVECOPISUNSIGNEDFLAGSARECONST                         In Linalg Mul/MulAdd functions, IsUnsigned flag is a constant.
-INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM                 Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.
-INSTR.MINPRECISIONNOTPRECISE                                  Instructions marked precise may not refer to minprecision values.
-INSTR.MINPRECISONBITCAST                                      Bitcast on minprecison types is not allowed.
-INSTR.MIPLEVELFORGETDIMENSION                                 Use mip level on buffer when GetDimensions.
-INSTR.MIPONUAVLOAD                                            uav load don't support mipLevel/sampleIndex.
-INSTR.MISSINGSETMESHOUTPUTCOUNTS                              Missing SetMeshOutputCounts call.
-INSTR.MULTIPLEGETMESHPAYLOAD                                  GetMeshPayload cannot be called multiple times.
-INSTR.MULTIPLESETMESHOUTPUTCOUNTS                             SetMeshOUtputCounts cannot be called multiple times.
-INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE                        Invalid use of completed record handle.
-INSTR.NOGENERICPTRADDRSPACECAST                               Address space cast between pointer types must have one part to be generic address space.
-INSTR.NOIDIVBYZERO                                            No signed integer division by zero.
-INSTR.NOINDEFINITEACOS                                        No indefinite arccosine.
-INSTR.NOINDEFINITEASIN                                        No indefinite arcsine.
-INSTR.NOINDEFINITEDSXY                                        No indefinite derivative calculation.
-INSTR.NOINDEFINITELOG                                         No indefinite logarithm.
-INSTR.NONDOMINATINGDISPATCHMESH                               Non-Dominating DispatchMesh call.
-INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS                        Non-Dominating SetMeshOutputCounts call.
-INSTR.NOREADINGUNINITIALIZED                                  Instructions should not read uninitialized value.
-INSTR.NOTONCEDISPATCHMESH                                     DispatchMesh must be called exactly once in an Amplification shader.
-INSTR.NOUDIVBYZERO                                            No unsigned integer division by zero.
-INSTR.OFFSETONUAVLOAD                                         uav load don't support offset.
-INSTR.OLOAD                                                   DXIL intrinsic overload must be valid.
-INSTR.ONLYONEALLOCCONSUME                                     RWStructuredBuffers may increment or decrement their counters, but not both.
-INSTR.OPCODERESERVED                                          Instructions must not reference reserved opcodes.
-INSTR.OPCONST                                                 DXIL intrinsic requires an immediate constant operand
-INSTR.OPCONSTRANGE                                            Constant values must be in-range for operation.
-INSTR.OPERANDRANGE                                            DXIL intrinsic operand must be within defined range
-INSTR.PARAMMULTIPLE                                           Parameter must be a valid multiple
-INSTR.PTRBITCAST                                              Pointer type bitcast must be have same size.
-INSTR.REORDERCOHERENTREQUIRESSM69                             reordercoherent requires SM 6.9 or later.
-INSTR.RESOURCECLASSFORLOAD                                    load can only run on UAV/SRV resource.
-INSTR.RESOURCECLASSFORSAMPLERGATHER                           sample, lod and gather should be on srv resource.
-INSTR.RESOURCECLASSFORUAVSTORE                                store should be on uav resource.
-INSTR.RESOURCECOORDINATEMISS                                  coord uninitialized.
-INSTR.RESOURCECOORDINATETOOMANY                               out of bound coord must be undef.
-INSTR.RESOURCEKINDFORBUFFERLOADSTORE                          buffer load/store only works on Raw/Typed/StructuredBuffer.
-INSTR.RESOURCEKINDFORCALCLOD                                  lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray.
-INSTR.RESOURCEKINDFORGATHER                                   gather requires resource declared as texture/2D/Cube/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORGETDIM                                   Invalid resource kind on GetDimensions.
-INSTR.RESOURCEKINDFORSAMPLE                                   sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORSAMPLEC                                  samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray.
-INSTR.RESOURCEKINDFORTEXTURELOAD                              texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray.
-INSTR.RESOURCEKINDFORTEXTURESTORE                             texture store only works on Texture1D/1DArray/2D/2DArray/3D.
-INSTR.RESOURCEKINDFORTRACERAY                                 TraceRay should only use RTAccelerationStructure.
-INSTR.RESOURCEMAPTOSINGLEENTRY                                Fail to map resource to resource table.
-INSTR.RESOURCEOFFSETMISS                                      offset uninitialized.
-INSTR.RESOURCEOFFSETTOOMANY                                   out of bound offset must be undef.
-INSTR.RESOURCEUSER                                            Resource should only be used by Load/GEP/Call.
-INSTR.SAMPLECOMPTYPE                                          sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT.
-INSTR.SAMPLEINDEXFORLOAD2DMS                                  load on Texture2DMS/2DMSArray require sampleIndex.
-INSTR.SAMPLERMODEFORLOD                                       lod instruction requires sampler declared in default mode.
-INSTR.SAMPLERMODEFORSAMPLE                                    sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode.
-INSTR.SAMPLERMODEFORSAMPLEC                                   sample_c_*/gather_c instructions require sampler declared in comparison mode.
-INSTR.SIGNATUREOPERATIONNOTINENTRY                            Dxil operation for input output signature must be in entryPoints.
-INSTR.STATUS                                                  Resource status should only be used by CheckAccessFullyMapped.
-INSTR.STRUCTBITCAST                                           Bitcast on struct types is not allowed.
-INSTR.SVCONFLICTINGLAUNCHMODE                                 Input system values are compatible with node shader launch mode.
-INSTR.TEXTUREOFFSET                                           offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7.
-INSTR.TGSMRACECOND                                            Race condition writing to shared memory detected, consider making this write conditional.
-INSTR.UNDEFHITOBJECT                                          HitObject is undef.
-INSTR.UNDEFINEDVALUEFORUAVSTORE                               Assignment of undefined values to UAV.
-INSTR.UNDEFRESULTFORGETDIMENSION                              GetDimensions used undef dimension %0 on %1.
-INSTR.WRITEMASKFORTYPEDUAVSTORE                               store on typed uav must write to all four components of the UAV.
-INSTR.WRITEMASKGAPFORUAV                                      UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw.
-INSTR.WRITEMASKMATCHVALUEFORUAVSTORE                          uav store write mask must match store value mask, write mask is %0 and store value mask is %1.
-META.BARYCENTRICSFLOAT3                                       only 'float3' type is allowed for SV_Barycentrics.
-META.BARYCENTRICSINTERPOLATION                                SV_Barycentrics cannot be used with 'nointerpolation' type.
-META.BARYCENTRICSTWOPERSPECTIVES                              There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode.
-META.BRANCHFLATTEN                                            Can't use branch and flatten attributes together.
-META.CLIPCULLMAXCOMPONENTS                                    Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components
-META.CLIPCULLMAXROWS                                          Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.
-META.COHERENCENOTONAPPENDCONSUME                              globally/reorder coherent incompatible with append/consume/counter buffers
-META.COMPUTEWITHNODE                                          Compute entry must not have node metadata
-META.CONTROLFLOWHINTNOTONCONTROLFLOW                          Control flow hint only works on control flow inst.
-META.DENSERESIDS                                              Resource identifiers must be zero-based and dense.
-META.DUPLICATESYSVALUE                                        System value may only appear once in signature
-META.ENTRYFUNCTION                                            entrypoint not found.
-META.FLAGSUSAGE                                               Flags must match usage.
-META.FORCECASEONSWITCH                                        Attribute forcecase only works for switch.
-META.INTEGERINTERPMODE                                        Interpolation mode on integer must be Constant
-META.INTERPMODEINONEROW                                       Interpolation mode must be identical for all elements packed into the same row.
-META.INTERPMODEVALID                                          Interpolation mode must be valid
-META.INVALIDCONTROLFLOWHINT                                   Invalid control flow hint.
-META.KNOWN                                                    Named metadata should be known
-META.MAXTESSFACTOR                                            Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
-META.NOENTRYPROPSFORENTRY                                     Entry point %0 must have entry properties.
-META.NOSEMANTICOVERLAP                                        Semantics must not overlap
-META.REQUIRED                                                 Required metadata missing.
-META.SEMAKINDMATCHESNAME                                      Semantic name must match system value, when defined.
-META.SEMAKINDVALID                                            Semantic kind must be valid
-META.SEMANTICCOMPTYPE                                         %0 must be %1.
-META.SEMANTICINDEXMAX                                         System value semantics have a maximum valid semantic index
-META.SEMANTICLEN                                              Semantic length must be at least 1 and at most 64.
-META.SEMANTICSHOULDBEALLOCATED                                Semantic should have a valid packing location
-META.SEMANTICSHOULDNOTBEALLOCATED                             Semantic should have a packing location of -1
-META.SIGNATURECOMPTYPE                                        signature %0 specifies unrecognized or invalid component type.
-META.SIGNATUREDATAWIDTH                                       Data width must be identical for all elements packed into the same row.
-META.SIGNATUREILLEGALCOMPONENTORDER                           Component ordering for packed elements must be: arbitrary < system value < system generated value
-META.SIGNATUREINDEXCONFLICT                                   Only elements with compatible indexing rules may be packed together
-META.SIGNATUREOUTOFRANGE                                      Signature elements must fit within maximum signature size
-META.SIGNATUREOVERLAP                                         Signature elements may not overlap in packing location.
-META.STRUCTBUFALIGNMENT                                       StructuredBuffer stride not aligned
-META.STRUCTBUFALIGNMENTOUTOFBOUND                             StructuredBuffer stride out of bounds
-META.SYSTEMVALUEROWS                                          System value may only have 1 row
-META.TARGET                                                   Target triple must be 'dxil-ms-dx'
-META.TESSELLATOROUTPUTPRIMITIVE                               Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
-META.TESSELLATORPARTITION                                     Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
-META.TEXTURETYPE                                              elements of typed buffers and textures must fit in four 32-bit quantities.
-META.USED                                                     All metadata must be used by dxil.
-META.VALIDSAMPLERMODE                                         Invalid sampler mode on sampler .
-META.VALUERANGE                                               Metadata value must be within range.
-META.VERSIONSUPPORTED                                         Version in metadata must be supported.
-META.WELLFORMED                                               Metadata must be well-formed in operand count and types.
-SM.64BITRAWBUFFERLOADSTORE                                    i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
-SM.AMPLIFICATIONSHADERPAYLOADSIZE                             For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
-SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED                     For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
-SM.APPENDANDCONSUMEONSAMEUAV                                  BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1.
-SM.CBUFFERARRAYOFFSETALIGNMENT                                CBuffer array offset must be aligned to 16-bytes
-SM.CBUFFERELEMENTOVERFLOW                                     CBuffer elements must not overflow
-SM.CBUFFEROFFSETOVERLAP                                       CBuffer offsets must not overlap
-SM.CBUFFERSIZE                                                CBuffer size must not exceed 65536 bytes
-SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT                            D3D12 constant/texture buffer template element can only be a struct.
-SM.COMPLETEPOSITION                                           Not all elements of SV_Position were written.
-SM.CONSTANTINTERPMODE                                         Interpolation mode must be constant for MS primitive output.
-SM.COUNTERONLYONSTRUCTBUF                                     BufferUpdateCounter valid only on structured buffers.
-SM.CSNOSIGNATURES                                             Compute shaders must not have shader signatures.
-SM.DOMAINLOCATIONIDXOOB                                       DomainLocation component index out of bounds for the domain.
-SM.DSINPUTCONTROLPOINTCOUNTRANGE                              DS input control point count must be [0..%0].  %1 specified.
-SM.DXILVERSION                                                Target shader model requires specific Dxil Version
-SM.EXPLICITTGSMSIZEONENTRY                                    Total Thread Group Shared Memory used by entry must not exceed limit specified by entry attribute.
-SM.GSINSTANCECOUNTRANGE                                       GS instance count must be [1..%0].  %1 specified.
-SM.GSOUTPUTVERTEXCOUNTRANGE                                   GS output vertex count must be [0..%0].  %1 specified.
-SM.GSTOTALOUTPUTVERTEXDATARANGE                               Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2.  This value cannot be greater than %3.
-SM.GSVALIDINPUTPRIMITIVE                                      GS input primitive unrecognized.
-SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY                             GS output primitive topology unrecognized.
-SM.HSINPUTCONTROLPOINTCOUNTRANGE                              HS input control point count must be [0..%0].  %1 specified.
-SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH                         For pass thru hull shader, input control point count must match output control point count
-SM.INCOMPATIBLECALLINENTRY                                    Features used in internal function calls must be compatible with entry
-SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL                      Derivatives in compute-model shaders require shader model 6.6 and above
-SM.INCOMPATIBLEDERIVLAUNCH                                    Node shaders only support derivatives in broadcasting and coalescing launch modes
-SM.INCOMPATIBLEOPERATION                                      Operations used in entry function must be compatible with shader stage and other properties
-SM.INCOMPATIBLEREQUIRESGROUP                                  Functions requiring groupshared memory must be called from shaders with a visible group
-SM.INCOMPATIBLESHADERMODEL                                    Functions may only use features available in the current shader model
-SM.INCOMPATIBLESTAGE                                          Functions may only use features available in the entry function's stage
-SM.INCOMPATIBLETHREADGROUPDIM                                 When derivatives are used in compute-model shaders, the thread group dimensions must be compatible
-SM.INSIDETESSFACTORSIZEMATCHDOMAIN                            InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
-SM.INVALIDRESOURCECOMPTYPE                                    Invalid resource return type.
-SM.INVALIDRESOURCEKIND                                        Invalid resources kind.
-SM.INVALIDSAMPLERFEEDBACKTYPE                                 Invalid sampler feedback type.
-SM.INVALIDTEXTUREKINDONUAV                                    TextureCube[Array] resources are not supported with UAVs.
-SM.ISOLINEOUTPUTPRIMITIVEMISMATCH                             Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
-SM.ISSPECIALFLOAT                                             16 bit IsSpecialFloat overloads require Shader Model 6.9 or higher.
-SM.MAXTGSMSIZEONENTRY                                         Total Thread Group Shared Memory used by entry must not exceed maximum for shader model.
-SM.MAXTHEADGROUP                                              Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.
-SM.MESHPSIGROWCOUNT                                           For shader '%0', primitive output signatures are taking up more than %1 rows.
-SM.MESHSHADERINOUTSIZE                                        For shader '%0', payload plus output size is greater than %1.
-SM.MESHSHADERMAXPRIMITIVECOUNT                                MS max primitive output count must be [0..%0].  %1 specified.
-SM.MESHSHADERMAXVERTEXCOUNT                                   MS max vertex output count must be [0..%0].  %1 specified.
-SM.MESHSHADEROUTPUTSIZE                                       For shader '%0', vertex plus primitive output size is greater than %1.
-SM.MESHSHADERPAYLOADSIZE                                      For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
-SM.MESHSHADERPAYLOADSIZEDECLARED                              For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
-SM.MESHTOTALSIGROWCOUNT                                       For shader '%0', vertex and primitive output signatures are taking up more than %1 rows.
-SM.MESHVSIGROWCOUNT                                           For shader '%0', vertex output signatures are taking up more than %1 rows.
-SM.MULTISTREAMMUSTBEPOINT                                     When multiple GS output streams are used they must be pointlists
-SM.NAME                                                       Target shader model name must be known
-SM.NOINTERPMODE                                               Interpolation mode must be undefined for VS input/PS output/patch constant.
-SM.NOPSOUTPUTIDX                                              Pixel shader output registers are not indexable.
-SM.OPCODE                                                     Opcode must be defined in target shader model
-SM.OPCODEININVALIDFUNCTION                                    Invalid DXIL opcode usage like StorePatchConstant in patch constant function
-SM.OPERAND                                                    Operand must be defined in target shader model.
-SM.OUTPUTCONTROLPOINTCOUNTRANGE                               output control point count must be [%0..%1].  %2 specified.
-SM.OUTPUTCONTROLPOINTSTOTALSCALARS                            Total number of scalars across all HS output control points must not exceed .
-SM.PATCHCONSTANTONLYFORHSDS                                   patch constant signature only valid in HS and DS.
-SM.PROGRAMVERSION                                             Program Version in Dxil Container does not match Dxil Module shader model version
-SM.PSCONSISTENTINTERP                                         Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample).
-SM.PSCOVERAGEANDINNERCOVERAGE                                 InnerCoverage and Coverage are mutually exclusive.
-SM.PSMULTIPLEDEPTHSEMANTIC                                    Pixel Shader only allows one type of depth semantic to be declared.
-SM.PSOUTPUTSEMANTIC                                           Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found.
-SM.PSTARGETCOL0                                               SV_Target packed location must start at column 0.
-SM.PSTARGETINDEXMATCHESROW                                    SV_Target semantic index must match packed row location.
-SM.RAYSHADERPAYLOADSIZE                                       For shader '%0', %1 size is smaller than argument's allocation size.
-SM.RAYSHADERSIGNATURES                                        Ray tracing shader '%0' should not have any shader signatures.
-SM.RESOURCERANGEOVERLAP                                       Resource ranges must not overlap
-SM.ROVONLYINPS                                                RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.
-SM.SAMPLECOUNTONLYON2DMS                                      Only Texture2DMS/2DMSArray could has sample count.
-SM.SEMANTIC                                                   Semantic must be defined in target shader model
-SM.STREAMINDEXRANGE                                           Stream index (%0) must between 0 and %1.
-SM.TESSFACTORFORDOMAIN                                        Required TessFactor for domain not found declared anywhere in Patch Constant data.
-SM.TESSFACTORSIZEMATCHDOMAIN                                  TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
-SM.TGSMUNSUPPORTED                                            Thread Group Shared Memory not supported %0.
-SM.THREADGROUPCHANNELRANGE                                    Declared Thread Group %0 size %1 outside valid range [%2..%3].
-SM.TRIOUTPUTPRIMITIVEMISMATCH                                 Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
-SM.UNDEFINEDOUTPUT                                            Not all elements of output %0 were written.
-SM.VALIDDOMAIN                                                Invalid Tessellator Domain specified. Must be isoline, tri or quad.
-SM.VIEWIDNEEDSSLOT                                            ViewID requires compatible space in pixel shader input signature
-SM.WAVESIZEALLZEROWHENUNDEFINED                               WaveSize Max and Preferred must be 0 when Min is 0
-SM.WAVESIZEEXPECTSONEPARAM                                    WaveSize tag expects exactly 1 parameter.
-SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE                     WaveSize Max and Preferred must be 0 to encode min==max
-SM.WAVESIZEMAXGREATERTHANMIN                                  WaveSize Max must greater than Min
-SM.WAVESIZENEEDSCONSTANTOPERANDS                              WaveSize metadata operands must be constant values.
-SM.WAVESIZENEEDSSM66OR67                                      WaveSize is valid only for Shader Model 6.6 and 6.7.
-SM.WAVESIZEONCOMPUTEORNODE                                    WaveSize only allowed on compute or node shaders
-SM.WAVESIZEPREFERREDINRANGE                                   WaveSize Preferred must be within Min..Max range
-SM.WAVESIZERANGEEXPECTSTHREEPARAMS                            WaveSize Range tag expects exactly 3 parameters.
-SM.WAVESIZERANGENEEDSSM68PLUS                                 WaveSize Range is valid only for Shader Model 6.8 and higher.
-SM.WAVESIZETAGDUPLICATE                                       WaveSize or WaveSizeRange tag may only appear once per entry point.
-SM.WAVESIZEVALUE                                              WaveSize value must be a power of 2 in range [4..128]
-SM.ZEROHSINPUTCONTROLPOINTWITHINPUT                           When HS input control point count is 0, no input signature should exist.
-TYPES.DEFINED                                                 Type must be defined based on DXIL primitives
-TYPES.I8                                                      I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.
-TYPES.INTWIDTH                                                Int type must be of valid width
-TYPES.NOMULTIDIM                                              Only one dimension allowed for array type.
-TYPES.NOPTRTOPTR                                              Pointers to pointers, or pointers in structures are not allowed.
-TYPES.NOVECTOR                                                Vector types must not be present
-============================================================= ========================================================================================================================================================================================================================================================================================================
+===================================================== ========================================================================================================================================================================================================================================================================================================
+Rule Code                                             Description
+===================================================== ========================================================================================================================================================================================================================================================================================================
+BITCODE.VALID                                         Module must be bitcode-valid
+CONTAINER.CONTENTINVALID                              DXIL Container Content is well-formed
+CONTAINER.CONTENTMATCHES                              DXIL Container Content must match Module
+CONTAINER.PARTINVALID                                 DXIL Container must not contain unknown parts
+CONTAINER.PARTMATCHES                                 DXIL Container Parts must match Module
+CONTAINER.PARTMISSING                                 DXIL Container requires certain parts, corresponding to module
+CONTAINER.PARTREPEATED                                DXIL Container must have only one of each part type
+CONTAINER.ROOTSIGNATUREINCOMPATIBLE                   Root Signature in DXIL Container must be compatible with shader
+CONTAINER.UNUSEDITEMINTABLE                           Items in Table must be used
+DECL.ALLOCATERAYQUERY2FLAGSARECONST                   constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant
+DECL.ALLOCATERAYQUERYFLAGSARECONST                    RayFlags for AllocateRayQuery must be constant
+DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument
+DECL.ATTRSTRUCT                                       Attributes parameter must be struct type
+DECL.DXILFNEXTERN                                     External function must be a DXIL function
+DECL.DXILNSRESERVED                                   The DXIL reserved prefixes must only be used by built-in functions and types
+DECL.EXTRAARGS                                        Extra arguments not allowed for shader functions
+DECL.FNATTRIBUTE                                      Functions should only contain known function attributes
+DECL.FNFLATTENPARAM                                   Function parameters must not use struct types
+DECL.FNISCALLED                                       Functions can only be used by call instructions
+DECL.MULTIPLENODEINPUTS                               A node shader may not have more than one input record
+DECL.NODELAUNCHINPUTTYPE                              Invalid input record type for node launch type
+DECL.NOTUSEDEXTERNAL                                  External declaration should not be used
+DECL.PARAMSTRUCT                                      Callable function parameter must be struct type
+DECL.PAYLOADSTRUCT                                    Payload parameter must be struct type
+DECL.RAYQUERYINFNSIG                                  Rayquery objects not allowed in function signatures
+DECL.RESOURCEINFNSIG                                  Resources not allowed in function signatures
+DECL.SHADERMISSINGARG                                 payload/params/attributes parameter is required for certain shader types
+DECL.SHADERRETURNVOID                                 Shader functions must return void
+DECL.USEDEXTERNALFUNCTION                             External function must be used
+DECL.USEDINTERNAL                                     Internal declaration must be used
+FLOW.DEADLOOP                                         Loop must have break.
+FLOW.FUNCTIONCALL                                     Function with parameter is not permitted
+FLOW.NORECURSION                                      Recursion is not permitted.
+FLOW.REDUCIBLE                                        Execution flow must be reducible.
+INSTR.ALLOWED                                         Instructions must be of an allowed type.
+INSTR.ATOMICCONST                                     Constant destination to atomic.
+INSTR.ATOMICINTRINNONUAV                              Non-UAV destination to atomic intrinsic.
+INSTR.ATOMICOPNONGROUPSHAREDORRECORD                  Non-groupshared or node record destination to atomic operation.
+INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION                Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function.
+INSTR.BARRIERFLAGINVALID                              Invalid %0 flags on DXIL operation '%1'
+INSTR.BARRIERMODEFORNONCS                             sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal).
+INSTR.BARRIERMODENOMEMORY                             sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory).  Only _t (thread group sync) is optional.
+INSTR.BARRIERMODEUSELESSUGROUP                        sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
+INSTR.BARRIERNONCONSTANTFLAGARGUMENT                  Memory type, access, or sync flag is not constant
+INSTR.BARRIERREQUIRESNODE                             sync in a non-Node Shader must not sync node record memory.
+INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER              BufferUpdateCounter valid only when HasCounter is true.
+INSTR.BUFFERUPDATECOUNTERONUAV                        BufferUpdateCounter valid only on UAV.
+INSTR.CALLOLOAD                                       Call to DXIL intrinsic must match overload signature
+INSTR.CANNOTPULLPOSITION                              pull-model evaluation of position disallowed
+INSTR.CBUFFERCLASSFORCBUFFERHANDLE                    Expect Cbuffer for CBufferLoad handle.
+INSTR.CBUFFEROUTOFBOUND                               Cbuffer access out of bound.
+INSTR.CHECKACCESSFULLYMAPPED                          CheckAccessFullyMapped should only be used on resource status.
+INSTR.CONSTALIGNFORRAWBUF                             Raw Buffer alignment value must be a constant.
+INSTR.COORDINATECOUNTFORRAWTYPEDBUF                   raw/typed buffer offset must be undef.
+INSTR.COORDINATECOUNTFORSTRUCTBUF                     structured buffer requires defined index and offset coordinates.
+INSTR.CREATEHANDLEIMMRANGEID                          Local resource must map to global resource.
+INSTR.DXILSTRUCTUSER                                  Dxil struct types should only be used by ExtractValue.
+INSTR.DXILSTRUCTUSEROUTOFBOUND                        Index out of bound when extract value from dxil struct types.
+INSTR.EVALINTERPOLATIONMODE                           Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample.
+INSTR.EXPDXILOPCODEREQUIRESEXPSM                      Use of experimental DXILOpCode requires an experimental shader model.
+INSTR.EXTRACTVALUE                                    ExtractValue should only be used on dxil struct types and cmpxchg.
+INSTR.FAILTORESLOVETGSMPOINTER                        TGSM pointers must originate from an unambiguous TGSM global variable.
+INSTR.HANDLENOTFROMCREATEHANDLE                       Resource handle should returned by createHandle.
+INSTR.ILLEGALDXILOPCODE                               DXILOpCode must be valid or a supported experimental opcode.
+INSTR.ILLEGALDXILOPFUNCTION                           '%0' is not a DXILOpFuncition for DXILOpcode '%1'.
+INSTR.IMMBIASFORSAMPLEB                               bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate.
+INSTR.INBOUNDSACCESS                                  Access to out-of-bounds memory is disallowed.
+INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM         Use of undef coherence hint or num coherence hint bits in MaybeReorderThread.
+INSTR.MINPRECISIONNOTPRECISE                          Instructions marked precise may not refer to minprecision values.
+INSTR.MINPRECISONBITCAST                              Bitcast on minprecison types is not allowed.
+INSTR.MIPLEVELFORGETDIMENSION                         Use mip level on buffer when GetDimensions.
+INSTR.MIPONUAVLOAD                                    uav load don't support mipLevel/sampleIndex.
+INSTR.MISSINGSETMESHOUTPUTCOUNTS                      Missing SetMeshOutputCounts call.
+INSTR.MULTIPLEGETMESHPAYLOAD                          GetMeshPayload cannot be called multiple times.
+INSTR.MULTIPLESETMESHOUTPUTCOUNTS                     SetMeshOUtputCounts cannot be called multiple times.
+INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE                Invalid use of completed record handle.
+INSTR.NOGENERICPTRADDRSPACECAST                       Address space cast between pointer types must have one part to be generic address space.
+INSTR.NOIDIVBYZERO                                    No signed integer division by zero.
+INSTR.NOINDEFINITEACOS                                No indefinite arccosine.
+INSTR.NOINDEFINITEASIN                                No indefinite arcsine.
+INSTR.NOINDEFINITEDSXY                                No indefinite derivative calculation.
+INSTR.NOINDEFINITELOG                                 No indefinite logarithm.
+INSTR.NONDOMINATINGDISPATCHMESH                       Non-Dominating DispatchMesh call.
+INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS                Non-Dominating SetMeshOutputCounts call.
+INSTR.NOREADINGUNINITIALIZED                          Instructions should not read uninitialized value.
+INSTR.NOTONCEDISPATCHMESH                             DispatchMesh must be called exactly once in an Amplification shader.
+INSTR.NOUDIVBYZERO                                    No unsigned integer division by zero.
+INSTR.OFFSETONUAVLOAD                                 uav load don't support offset.
+INSTR.OLOAD                                           DXIL intrinsic overload must be valid.
+INSTR.ONLYONEALLOCCONSUME                             RWStructuredBuffers may increment or decrement their counters, but not both.
+INSTR.OPCODERESERVED                                  Instructions must not reference reserved opcodes.
+INSTR.OPCONST                                         DXIL intrinsic requires an immediate constant operand
+INSTR.OPCONSTRANGE                                    Constant values must be in-range for operation.
+INSTR.OPERANDRANGE                                    DXIL intrinsic operand must be within defined range
+INSTR.PARAMMULTIPLE                                   Parameter must be a valid multiple
+INSTR.PTRBITCAST                                      Pointer type bitcast must be have same size.
+INSTR.REORDERCOHERENTREQUIRESSM69                     reordercoherent requires SM 6.9 or later.
+INSTR.RESOURCECLASSFORLOAD                            load can only run on UAV/SRV resource.
+INSTR.RESOURCECLASSFORSAMPLERGATHER                   sample, lod and gather should be on srv resource.
+INSTR.RESOURCECLASSFORUAVSTORE                        store should be on uav resource.
+INSTR.RESOURCECOORDINATEMISS                          coord uninitialized.
+INSTR.RESOURCECOORDINATETOOMANY                       out of bound coord must be undef.
+INSTR.RESOURCEKINDFORBUFFERLOADSTORE                  buffer load/store only works on Raw/Typed/StructuredBuffer.
+INSTR.RESOURCEKINDFORCALCLOD                          lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray.
+INSTR.RESOURCEKINDFORGATHER                           gather requires resource declared as texture/2D/Cube/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORGETDIM                           Invalid resource kind on GetDimensions.
+INSTR.RESOURCEKINDFORSAMPLE                           sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORSAMPLEC                          samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray.
+INSTR.RESOURCEKINDFORTEXTURELOAD                      texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray.
+INSTR.RESOURCEKINDFORTEXTURESTORE                     texture store only works on Texture1D/1DArray/2D/2DArray/3D.
+INSTR.RESOURCEKINDFORTRACERAY                         TraceRay should only use RTAccelerationStructure.
+INSTR.RESOURCEMAPTOSINGLEENTRY                        Fail to map resource to resource table.
+INSTR.RESOURCEOFFSETMISS                              offset uninitialized.
+INSTR.RESOURCEOFFSETTOOMANY                           out of bound offset must be undef.
+INSTR.RESOURCEUSER                                    Resource should only be used by Load/GEP/Call.
+INSTR.SAMPLECOMPTYPE                                  sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT.
+INSTR.SAMPLEINDEXFORLOAD2DMS                          load on Texture2DMS/2DMSArray require sampleIndex.
+INSTR.SAMPLERMODEFORLOD                               lod instruction requires sampler declared in default mode.
+INSTR.SAMPLERMODEFORSAMPLE                            sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode.
+INSTR.SAMPLERMODEFORSAMPLEC                           sample_c_*/gather_c instructions require sampler declared in comparison mode.
+INSTR.SIGNATUREOPERATIONNOTINENTRY                    Dxil operation for input output signature must be in entryPoints.
+INSTR.STATUS                                          Resource status should only be used by CheckAccessFullyMapped.
+INSTR.STRUCTBITCAST                                   Bitcast on struct types is not allowed.
+INSTR.SVCONFLICTINGLAUNCHMODE                         Input system values are compatible with node shader launch mode.
+INSTR.TEXTUREOFFSET                                   offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7.
+INSTR.TGSMRACECOND                                    Race condition writing to shared memory detected, consider making this write conditional.
+INSTR.UNDEFHITOBJECT                                  HitObject is undef.
+INSTR.UNDEFINEDVALUEFORUAVSTORE                       Assignment of undefined values to UAV.
+INSTR.UNDEFRESULTFORGETDIMENSION                      GetDimensions used undef dimension %0 on %1.
+INSTR.WRITEMASKFORTYPEDUAVSTORE                       store on typed uav must write to all four components of the UAV.
+INSTR.WRITEMASKGAPFORUAV                              UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw.
+INSTR.WRITEMASKMATCHVALUEFORUAVSTORE                  uav store write mask must match store value mask, write mask is %0 and store value mask is %1.
+META.BARYCENTRICSFLOAT3                               only 'float3' type is allowed for SV_Barycentrics.
+META.BARYCENTRICSINTERPOLATION                        SV_Barycentrics cannot be used with 'nointerpolation' type.
+META.BARYCENTRICSTWOPERSPECTIVES                      There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode.
+META.BRANCHFLATTEN                                    Can't use branch and flatten attributes together.
+META.CLIPCULLMAXCOMPONENTS                            Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components
+META.CLIPCULLMAXROWS                                  Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows.
+META.COHERENCENOTONAPPENDCONSUME                      globally/reorder coherent incompatible with append/consume/counter buffers
+META.COMPUTEWITHNODE                                  Compute entry must not have node metadata
+META.CONTROLFLOWHINTNOTONCONTROLFLOW                  Control flow hint only works on control flow inst.
+META.DENSERESIDS                                      Resource identifiers must be zero-based and dense.
+META.DUPLICATESYSVALUE                                System value may only appear once in signature
+META.ENTRYFUNCTION                                    entrypoint not found.
+META.FLAGSUSAGE                                       Flags must match usage.
+META.FORCECASEONSWITCH                                Attribute forcecase only works for switch.
+META.INTEGERINTERPMODE                                Interpolation mode on integer must be Constant
+META.INTERPMODEINONEROW                               Interpolation mode must be identical for all elements packed into the same row.
+META.INTERPMODEVALID                                  Interpolation mode must be valid
+META.INVALIDCONTROLFLOWHINT                           Invalid control flow hint.
+META.KNOWN                                            Named metadata should be known
+META.MAXTESSFACTOR                                    Hull Shader MaxTessFactor must be [%0..%1].  %2 specified.
+META.NOENTRYPROPSFORENTRY                             Entry point %0 must have entry properties.
+META.NOSEMANTICOVERLAP                                Semantics must not overlap
+META.REQUIRED                                         Required metadata missing.
+META.SEMAKINDMATCHESNAME                              Semantic name must match system value, when defined.
+META.SEMAKINDVALID                                    Semantic kind must be valid
+META.SEMANTICCOMPTYPE                                 %0 must be %1.
+META.SEMANTICINDEXMAX                                 System value semantics have a maximum valid semantic index
+META.SEMANTICLEN                                      Semantic length must be at least 1 and at most 64.
+META.SEMANTICSHOULDBEALLOCATED                        Semantic should have a valid packing location
+META.SEMANTICSHOULDNOTBEALLOCATED                     Semantic should have a packing location of -1
+META.SIGNATURECOMPTYPE                                signature %0 specifies unrecognized or invalid component type.
+META.SIGNATUREDATAWIDTH                               Data width must be identical for all elements packed into the same row.
+META.SIGNATUREILLEGALCOMPONENTORDER                   Component ordering for packed elements must be: arbitrary < system value < system generated value
+META.SIGNATUREINDEXCONFLICT                           Only elements with compatible indexing rules may be packed together
+META.SIGNATUREOUTOFRANGE                              Signature elements must fit within maximum signature size
+META.SIGNATUREOVERLAP                                 Signature elements may not overlap in packing location.
+META.STRUCTBUFALIGNMENT                               StructuredBuffer stride not aligned
+META.STRUCTBUFALIGNMENTOUTOFBOUND                     StructuredBuffer stride out of bounds
+META.SYSTEMVALUEROWS                                  System value may only have 1 row
+META.TARGET                                           Target triple must be 'dxil-ms-dx'
+META.TESSELLATOROUTPUTPRIMITIVE                       Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
+META.TESSELLATORPARTITION                             Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
+META.TEXTURETYPE                                      elements of typed buffers and textures must fit in four 32-bit quantities.
+META.USED                                             All metadata must be used by dxil.
+META.VALIDSAMPLERMODE                                 Invalid sampler mode on sampler .
+META.VALUERANGE                                       Metadata value must be within range.
+META.VERSIONSUPPORTED                                 Version in metadata must be supported.
+META.WELLFORMED                                       Metadata must be well-formed in operand count and types.
+SM.64BITRAWBUFFERLOADSTORE                            i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3.
+SM.AMPLIFICATIONSHADERPAYLOADSIZE                     For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
+SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED             For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
+SM.APPENDANDCONSUMEONSAMEUAV                          BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1.
+SM.CBUFFERARRAYOFFSETALIGNMENT                        CBuffer array offset must be aligned to 16-bytes
+SM.CBUFFERELEMENTOVERFLOW                             CBuffer elements must not overflow
+SM.CBUFFEROFFSETOVERLAP                               CBuffer offsets must not overlap
+SM.CBUFFERSIZE                                        CBuffer size must not exceed 65536 bytes
+SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT                    D3D12 constant/texture buffer template element can only be a struct.
+SM.COMPLETEPOSITION                                   Not all elements of SV_Position were written.
+SM.CONSTANTINTERPMODE                                 Interpolation mode must be constant for MS primitive output.
+SM.COUNTERONLYONSTRUCTBUF                             BufferUpdateCounter valid only on structured buffers.
+SM.CSNOSIGNATURES                                     Compute shaders must not have shader signatures.
+SM.DOMAINLOCATIONIDXOOB                               DomainLocation component index out of bounds for the domain.
+SM.DSINPUTCONTROLPOINTCOUNTRANGE                      DS input control point count must be [0..%0].  %1 specified.
+SM.DXILVERSION                                        Target shader model requires specific Dxil Version
+SM.EXPLICITTGSMSIZEONENTRY                            Total Thread Group Shared Memory used by entry must not exceed limit specified by entry attribute.
+SM.GSINSTANCECOUNTRANGE                               GS instance count must be [1..%0].  %1 specified.
+SM.GSOUTPUTVERTEXCOUNTRANGE                           GS output vertex count must be [0..%0].  %1 specified.
+SM.GSTOTALOUTPUTVERTEXDATARANGE                       Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2.  This value cannot be greater than %3.
+SM.GSVALIDINPUTPRIMITIVE                              GS input primitive unrecognized.
+SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY                     GS output primitive topology unrecognized.
+SM.HSINPUTCONTROLPOINTCOUNTRANGE                      HS input control point count must be [0..%0].  %1 specified.
+SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH                 For pass thru hull shader, input control point count must match output control point count
+SM.INCOMPATIBLECALLINENTRY                            Features used in internal function calls must be compatible with entry
+SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL              Derivatives in compute-model shaders require shader model 6.6 and above
+SM.INCOMPATIBLEDERIVLAUNCH                            Node shaders only support derivatives in broadcasting and coalescing launch modes
+SM.INCOMPATIBLEOPERATION                              Operations used in entry function must be compatible with shader stage and other properties
+SM.INCOMPATIBLEREQUIRESGROUP                          Functions requiring groupshared memory must be called from shaders with a visible group
+SM.INCOMPATIBLESHADERMODEL                            Functions may only use features available in the current shader model
+SM.INCOMPATIBLESTAGE                                  Functions may only use features available in the entry function's stage
+SM.INCOMPATIBLETHREADGROUPDIM                         When derivatives are used in compute-model shaders, the thread group dimensions must be compatible
+SM.INSIDETESSFACTORSIZEMATCHDOMAIN                    InsideTessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
+SM.INVALIDRESOURCECOMPTYPE                            Invalid resource return type.
+SM.INVALIDRESOURCEKIND                                Invalid resources kind.
+SM.INVALIDSAMPLERFEEDBACKTYPE                         Invalid sampler feedback type.
+SM.INVALIDTEXTUREKINDONUAV                            TextureCube[Array] resources are not supported with UAVs.
+SM.ISOLINEOUTPUTPRIMITIVEMISMATCH                     Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
+SM.ISSPECIALFLOAT                                     16 bit IsSpecialFloat overloads require Shader Model 6.9 or higher.
+SM.MAXTGSMSIZEONENTRY                                 Total Thread Group Shared Memory used by entry must not exceed maximum for shader model.
+SM.MAXTHEADGROUP                                      Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1.
+SM.MESHPSIGROWCOUNT                                   For shader '%0', primitive output signatures are taking up more than %1 rows.
+SM.MESHSHADERINOUTSIZE                                For shader '%0', payload plus output size is greater than %1.
+SM.MESHSHADERMAXPRIMITIVECOUNT                        MS max primitive output count must be [0..%0].  %1 specified.
+SM.MESHSHADERMAXVERTEXCOUNT                           MS max vertex output count must be [0..%0].  %1 specified.
+SM.MESHSHADEROUTPUTSIZE                               For shader '%0', vertex plus primitive output size is greater than %1.
+SM.MESHSHADERPAYLOADSIZE                              For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes.
+SM.MESHSHADERPAYLOADSIZEDECLARED                      For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes.
+SM.MESHTOTALSIGROWCOUNT                               For shader '%0', vertex and primitive output signatures are taking up more than %1 rows.
+SM.MESHVSIGROWCOUNT                                   For shader '%0', vertex output signatures are taking up more than %1 rows.
+SM.MULTISTREAMMUSTBEPOINT                             When multiple GS output streams are used they must be pointlists
+SM.NAME                                               Target shader model name must be known
+SM.NOINTERPMODE                                       Interpolation mode must be undefined for VS input/PS output/patch constant.
+SM.NOPSOUTPUTIDX                                      Pixel shader output registers are not indexable.
+SM.OPCODE                                             Opcode must be defined in target shader model
+SM.OPCODEININVALIDFUNCTION                            Invalid DXIL opcode usage like StorePatchConstant in patch constant function
+SM.OPERAND                                            Operand must be defined in target shader model.
+SM.OUTPUTCONTROLPOINTCOUNTRANGE                       output control point count must be [%0..%1].  %2 specified.
+SM.OUTPUTCONTROLPOINTSTOTALSCALARS                    Total number of scalars across all HS output control points must not exceed .
+SM.PATCHCONSTANTONLYFORHSDS                           patch constant signature only valid in HS and DS.
+SM.PROGRAMVERSION                                     Program Version in Dxil Container does not match Dxil Module shader model version
+SM.PSCONSISTENTINTERP                                 Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample).
+SM.PSCOVERAGEANDINNERCOVERAGE                         InnerCoverage and Coverage are mutually exclusive.
+SM.PSMULTIPLEDEPTHSEMANTIC                            Pixel Shader only allows one type of depth semantic to be declared.
+SM.PSOUTPUTSEMANTIC                                   Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found.
+SM.PSTARGETCOL0                                       SV_Target packed location must start at column 0.
+SM.PSTARGETINDEXMATCHESROW                            SV_Target semantic index must match packed row location.
+SM.RAYSHADERPAYLOADSIZE                               For shader '%0', %1 size is smaller than argument's allocation size.
+SM.RAYSHADERSIGNATURES                                Ray tracing shader '%0' should not have any shader signatures.
+SM.RESOURCERANGEOVERLAP                               Resource ranges must not overlap
+SM.ROVONLYINPS                                        RasterizerOrdered objects are only allowed in 5.0+ pixel shaders.
+SM.SAMPLECOUNTONLYON2DMS                              Only Texture2DMS/2DMSArray could has sample count.
+SM.SEMANTIC                                           Semantic must be defined in target shader model
+SM.STREAMINDEXRANGE                                   Stream index (%0) must between 0 and %1.
+SM.TESSFACTORFORDOMAIN                                Required TessFactor for domain not found declared anywhere in Patch Constant data.
+SM.TESSFACTORSIZEMATCHDOMAIN                          TessFactor rows, columns (%0, %1) invalid for domain %2.  Expected %3 rows and 1 column.
+SM.TGSMUNSUPPORTED                                    Thread Group Shared Memory not supported %0.
+SM.THREADGROUPCHANNELRANGE                            Declared Thread Group %0 size %1 outside valid range [%2..%3].
+SM.TRIOUTPUTPRIMITIVEMISMATCH                         Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain.
+SM.UNDEFINEDOUTPUT                                    Not all elements of output %0 were written.
+SM.VALIDDOMAIN                                        Invalid Tessellator Domain specified. Must be isoline, tri or quad.
+SM.VIEWIDNEEDSSLOT                                    ViewID requires compatible space in pixel shader input signature
+SM.WAVESIZEALLZEROWHENUNDEFINED                       WaveSize Max and Preferred must be 0 when Min is 0
+SM.WAVESIZEEXPECTSONEPARAM                            WaveSize tag expects exactly 1 parameter.
+SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE             WaveSize Max and Preferred must be 0 to encode min==max
+SM.WAVESIZEMAXGREATERTHANMIN                          WaveSize Max must greater than Min
+SM.WAVESIZENEEDSCONSTANTOPERANDS                      WaveSize metadata operands must be constant values.
+SM.WAVESIZENEEDSSM66OR67                              WaveSize is valid only for Shader Model 6.6 and 6.7.
+SM.WAVESIZEONCOMPUTEORNODE                            WaveSize only allowed on compute or node shaders
+SM.WAVESIZEPREFERREDINRANGE                           WaveSize Preferred must be within Min..Max range
+SM.WAVESIZERANGEEXPECTSTHREEPARAMS                    WaveSize Range tag expects exactly 3 parameters.
+SM.WAVESIZERANGENEEDSSM68PLUS                         WaveSize Range is valid only for Shader Model 6.8 and higher.
+SM.WAVESIZETAGDUPLICATE                               WaveSize or WaveSizeRange tag may only appear once per entry point.
+SM.WAVESIZEVALUE                                      WaveSize value must be a power of 2 in range [4..128]
+SM.ZEROHSINPUTCONTROLPOINTWITHINPUT                   When HS input control point count is 0, no input signature should exist.
+TYPES.DEFINED                                         Type must be defined based on DXIL primitives
+TYPES.I8                                              I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics.
+TYPES.INTWIDTH                                        Int type must be of valid width
+TYPES.NOMULTIDIM                                      Only one dimension allowed for array type.
+TYPES.NOPTRTOPTR                                      Pointers to pointers, or pointers in structures are not allowed.
+TYPES.NOVECTOR                                        Vector types must not be present
+===================================================== ========================================================================================================================================================================================================================================================================================================
 
 .. VALRULES-RST:END
 
diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md
index 4a7f8f08b6..49ec7d6d4b 100644
--- a/docs/ReleaseNotes.md
+++ b/docs/ReleaseNotes.md
@@ -24,8 +24,7 @@ line upon naming the release. Refer to previous for appropriate section names.
 
 #### Experimental Shader Model 6.10
 
-- Moved Linear Algebra (Cooperative Vector) DXIL Opcodes to experimental Shader Model 6.10
-- The Cooperative Vectors API was moved to `coopvec.h` header and under the `dx::coopvec` namespace.
+- Removed experimental Cooperative Vector, this has been replaced by LinAlg matrix.
 - Implement GetGroupWaveIndex and GetGroupWaveCount in experimental Shader Model 6.10.
   - [proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0048-group-wave-index.md)
   - GetGroupWaveIndex: New intrinsic for Compute, Mesh, Amplification and Node shaders which returns the index of the wave within the thread group that the the thread is executing.
diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h
index 307a94302d..3be6489008 100644
--- a/include/dxc/DXIL/DxilConstants.h
+++ b/include/dxc/DXIL/DxilConstants.h
@@ -204,6 +204,13 @@ enum class MatrixScope : uint32_t {
   ThreadGroup = 2,
 };
 
+enum class LinalgMatrixLayout : uint32_t {
+  RowMajor = 0,
+  ColumnMajor = 1,
+  MulOptimal = 2,
+  OuterProductOptimal = 3,
+};
+
 // Must match D3D_INTERPOLATION_MODE
 enum class InterpolationMode : uint8_t {
   Undefined = 0,
@@ -524,9 +531,9 @@ static const OpCodeTableID TableID = OpCodeTableID::ExperimentalOps;
 // Enumeration for ExperimentalOps DXIL operations
 enum class OpCode : unsigned {
   //
-  ReservedD1 = 30, // reserved
-  ReservedD2 = 31, // reserved
-  ReservedD3 = 32, // reserved
+  ReservedE1 = 30, // reserved
+  ReservedE2 = 31, // reserved
+  ReservedE3 = 32, // reserved
 
   // Debugging
   DebugBreak = 33,        // triggers a breakpoint if a debugger is attached
@@ -647,6 +654,10 @@ enum class OpCode : unsigned {
   ReservedC7 = 300,  // reserved
   ReservedC8 = 301,  // reserved
   ReservedC9 = 302,  // reserved
+  ReservedD0 = 305,  // reserved
+  ReservedD1 = 306,  // reserved
+  ReservedD2 = 307,  // reserved
+  ReservedD3 = 308,  // reserved
 
   // Amplification shader instructions
   DispatchMesh = 173, // Amplification shader intrinsic DispatchMesh
@@ -876,19 +887,6 @@ enum class OpCode : unsigned {
   CreateHandleForLib =
       160, // create resource handle from resource struct for library
 
-  // Linear Algebra Operations
-  MatVecMul =
-      305, // Multiplies a MxK dimension matrix and a K sized input vector
-  MatVecMulAdd = 306, // multiplies a MxK dimension matrix and a K sized input
-                      // vector and adds an M-sized bias vector
-  OuterProductAccumulate =
-      307, // Computes the outer product between column vectors and an MxN
-           // matrix is accumulated component-wise atomically (with device
-           // scope) in memory
-  VectorAccumulate = 308, // Accumulates the components of a vector
-                          // component-wise atomically (with device scope) to
-                          // the corresponding elements of an array in memory
-
   // Mesh shader instructions
   EmitIndices = 169, // emit a primitive's vertex indices in a mesh shader
   GetMeshPayload =
@@ -1351,12 +1349,12 @@ enum class OpCode : unsigned {
       ExperimentalOps,
       LinAlgMatrixOuterProduct), // Outer products an M sized vector and a N
                                  // sized vector producing an MxN matrix
-  // ReservedD1 = 0x8000001E, 2147483678U, -2147483618
-  EXP_OPCODE(ExperimentalOps, ReservedD1), // reserved
-  // ReservedD2 = 0x8000001F, 2147483679U, -2147483617
-  EXP_OPCODE(ExperimentalOps, ReservedD2), // reserved
-  // ReservedD3 = 0x80000020, 2147483680U, -2147483616
-  EXP_OPCODE(ExperimentalOps, ReservedD3), // reserved
+  // ReservedE1 = 0x8000001E, 2147483678U, -2147483618
+  EXP_OPCODE(ExperimentalOps, ReservedE1), // reserved
+  // ReservedE2 = 0x8000001F, 2147483679U, -2147483617
+  EXP_OPCODE(ExperimentalOps, ReservedE2), // reserved
+  // ReservedE3 = 0x80000020, 2147483680U, -2147483616
+  EXP_OPCODE(ExperimentalOps, ReservedE3), // reserved
   // DebugBreak = 0x80000021, 2147483681U, -2147483615
   EXP_OPCODE(ExperimentalOps,
              DebugBreak), // triggers a breakpoint if a debugger is attached
@@ -1541,10 +1539,6 @@ enum class OpCodeClass : unsigned {
   LinAlgMatrixSetElement,
   LinAlgMatrixStoreToDescriptor,
   LinAlgMatrixStoreToMemory,
-  MatVecMul,
-  MatVecMulAdd,
-  OuterProductAccumulate,
-  VectorAccumulate,
 
   // Mesh shader instructions
   EmitIndices,
@@ -1731,7 +1725,7 @@ enum class OpCodeClass : unsigned {
   NodeOutputIsValid,
   OutputComplete,
 
-  NumOpClasses = 225, // exclusive last value of enumeration
+  NumOpClasses = 221, // exclusive last value of enumeration
 };
 // OPCODECLASS-ENUM:END
 
@@ -1911,29 +1905,6 @@ const unsigned kHitObjectTraceRay_RayDescOpIdx = 7;
 const unsigned kHitObjectTraceRay_PayloadOpIdx = 15;
 const unsigned kHitObjectTraceRay_NumOp = 16;
 
-// MatVec Ops
-const unsigned kMatVecMulInputVectorIdx = 1;
-const unsigned kMatVecMulIsInputUnsignedIdx = 2;
-const unsigned kMatVecMulInputInterpretationIdx = 3;
-const unsigned kMatVecMulMatrixBufferIdx = 4;
-const unsigned kMatVecMulMatrixOffsetIdx = 5;
-const unsigned kMatVecMulMatrixInterpretationIdx = 6;
-const unsigned kMatVecMulMatrixMIdx = 7;
-const unsigned kMatVecMulMatrixKIdx = 8;
-const unsigned kMatVecMulMatrixLayoutIdx = 9;
-const unsigned kMatVecMulMatrixTransposeIdx = 10;
-const unsigned kMatVecMulMatrixStrideIdx = 11;
-const unsigned kMatVecMulIsOutputUnsignedIdx = 12;
-
-// MatVecAdd
-const unsigned kMatVecMulAddBiasInterpretation = 14;
-const unsigned kMatVecMulAddIsOutputUnsignedIdx = 15;
-
-// Outer Product Accumulate
-const unsigned kOuterProdAccMatrixInterpretation = 5;
-const unsigned kOuterProdAccMatrixLayout = 6;
-const unsigned kOuterProdAccMatrixStride = 7;
-
 // TODO: add operand index for all the OpCodeClass.
 } // namespace OperandIndex
 
@@ -2506,13 +2477,6 @@ extern const char *kHostLayoutTypePrefix;
 
 extern const char *kWaveOpsIncludeHelperLanesString;
 
-enum class LinalgMatrixLayout : uint32_t {
-  RowMajor = 0,
-  ColumnMajor = 1,
-  MulOptimal = 2,
-  OuterProductOptimal = 3,
-};
-
 } // namespace DXIL
 
 } // namespace hlsl
diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h
index c840a2fc6a..35245339cd 100644
--- a/include/dxc/DXIL/DxilInstructions.h
+++ b/include/dxc/DXIL/DxilInstructions.h
@@ -9919,236 +9919,6 @@ struct DxilInst_RawBufferVectorStore {
   }
 };
 
-/// This instruction Multiplies a MxK dimension matrix and a K sized input
-/// vector
-struct DxilInst_MatVecMul {
-  llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_MatVecMul(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::MatVecMul);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (13 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
-      return false;
-    return true;
-  }
-  // Metadata
-  bool requiresUniformInputs() const { return false; }
-  // Operand indexes
-  enum OperandIdx {
-    arg_inputVector = 1,
-    arg_isInputUnsigned = 2,
-    arg_inputInterpretation = 3,
-    arg_matrixBuffer = 4,
-    arg_matrixOffset = 5,
-    arg_matrixIntepretation = 6,
-    arg_matrixM = 7,
-    arg_matrixK = 8,
-    arg_matrixLayout = 9,
-    arg_matrixTranspose = 10,
-    arg_matrixStride = 11,
-    arg_isOutputUnsigned = 12,
-  };
-  // Accessors
-  llvm::Value *get_inputVector() const { return Instr->getOperand(1); }
-  void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); }
-  llvm::Value *get_isInputUnsigned() const { return Instr->getOperand(2); }
-  void set_isInputUnsigned(llvm::Value *val) { Instr->setOperand(2, val); }
-  llvm::Value *get_inputInterpretation() const { return Instr->getOperand(3); }
-  void set_inputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); }
-  llvm::Value *get_matrixBuffer() const { return Instr->getOperand(4); }
-  void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(4, val); }
-  llvm::Value *get_matrixOffset() const { return Instr->getOperand(5); }
-  void set_matrixOffset(llvm::Value *val) { Instr->setOperand(5, val); }
-  llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(6); }
-  void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(6, val); }
-  llvm::Value *get_matrixM() const { return Instr->getOperand(7); }
-  void set_matrixM(llvm::Value *val) { Instr->setOperand(7, val); }
-  llvm::Value *get_matrixK() const { return Instr->getOperand(8); }
-  void set_matrixK(llvm::Value *val) { Instr->setOperand(8, val); }
-  llvm::Value *get_matrixLayout() const { return Instr->getOperand(9); }
-  void set_matrixLayout(llvm::Value *val) { Instr->setOperand(9, val); }
-  llvm::Value *get_matrixTranspose() const { return Instr->getOperand(10); }
-  void set_matrixTranspose(llvm::Value *val) { Instr->setOperand(10, val); }
-  llvm::Value *get_matrixStride() const { return Instr->getOperand(11); }
-  void set_matrixStride(llvm::Value *val) { Instr->setOperand(11, val); }
-  llvm::Value *get_isOutputUnsigned() const { return Instr->getOperand(12); }
-  void set_isOutputUnsigned(llvm::Value *val) { Instr->setOperand(12, val); }
-};
-
-/// This instruction multiplies a MxK dimension matrix and a K sized input
-/// vector and adds an M-sized bias vector
-struct DxilInst_MatVecMulAdd {
-  llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_MatVecMulAdd(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
-                                          hlsl::OP::OpCode::MatVecMulAdd);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (16 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
-      return false;
-    return true;
-  }
-  // Metadata
-  bool requiresUniformInputs() const { return false; }
-  // Operand indexes
-  enum OperandIdx {
-    arg_inputVector = 1,
-    arg_isInputUnsigned = 2,
-    arg_inputInterpretation = 3,
-    arg_matrixBuffer = 4,
-    arg_matrixOffset = 5,
-    arg_matrixIntepretation = 6,
-    arg_matrixM = 7,
-    arg_matrixK = 8,
-    arg_matrixLayout = 9,
-    arg_matrixTranspose = 10,
-    arg_matrixStride = 11,
-    arg_biasBuffer = 12,
-    arg_biasOffset = 13,
-    arg_biasIntepretation = 14,
-    arg_isOutputUnsigned = 15,
-  };
-  // Accessors
-  llvm::Value *get_inputVector() const { return Instr->getOperand(1); }
-  void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); }
-  llvm::Value *get_isInputUnsigned() const { return Instr->getOperand(2); }
-  void set_isInputUnsigned(llvm::Value *val) { Instr->setOperand(2, val); }
-  llvm::Value *get_inputInterpretation() const { return Instr->getOperand(3); }
-  void set_inputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); }
-  llvm::Value *get_matrixBuffer() const { return Instr->getOperand(4); }
-  void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(4, val); }
-  llvm::Value *get_matrixOffset() const { return Instr->getOperand(5); }
-  void set_matrixOffset(llvm::Value *val) { Instr->setOperand(5, val); }
-  llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(6); }
-  void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(6, val); }
-  llvm::Value *get_matrixM() const { return Instr->getOperand(7); }
-  void set_matrixM(llvm::Value *val) { Instr->setOperand(7, val); }
-  llvm::Value *get_matrixK() const { return Instr->getOperand(8); }
-  void set_matrixK(llvm::Value *val) { Instr->setOperand(8, val); }
-  llvm::Value *get_matrixLayout() const { return Instr->getOperand(9); }
-  void set_matrixLayout(llvm::Value *val) { Instr->setOperand(9, val); }
-  llvm::Value *get_matrixTranspose() const { return Instr->getOperand(10); }
-  void set_matrixTranspose(llvm::Value *val) { Instr->setOperand(10, val); }
-  llvm::Value *get_matrixStride() const { return Instr->getOperand(11); }
-  void set_matrixStride(llvm::Value *val) { Instr->setOperand(11, val); }
-  llvm::Value *get_biasBuffer() const { return Instr->getOperand(12); }
-  void set_biasBuffer(llvm::Value *val) { Instr->setOperand(12, val); }
-  llvm::Value *get_biasOffset() const { return Instr->getOperand(13); }
-  void set_biasOffset(llvm::Value *val) { Instr->setOperand(13, val); }
-  llvm::Value *get_biasIntepretation() const { return Instr->getOperand(14); }
-  void set_biasIntepretation(llvm::Value *val) { Instr->setOperand(14, val); }
-  llvm::Value *get_isOutputUnsigned() const { return Instr->getOperand(15); }
-  void set_isOutputUnsigned(llvm::Value *val) { Instr->setOperand(15, val); }
-};
-
-/// This instruction Computes the outer product between column vectors and an
-/// MxN matrix is accumulated component-wise atomically (with device scope) in
-/// memory
-struct DxilInst_OuterProductAccumulate {
-  llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_OuterProductAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(
-        Instr, hlsl::OP::OpCode::OuterProductAccumulate);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (8 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
-      return false;
-    return true;
-  }
-  // Metadata
-  bool requiresUniformInputs() const { return false; }
-  // Operand indexes
-  enum OperandIdx {
-    arg_inputVector1 = 1,
-    arg_inputVector2 = 2,
-    arg_matrixBuffer = 3,
-    arg_matrixOffset = 4,
-    arg_matrixIntepretation = 5,
-    arg_matrixLayout = 6,
-    arg_matrixStride = 7,
-  };
-  // Accessors
-  llvm::Value *get_inputVector1() const { return Instr->getOperand(1); }
-  void set_inputVector1(llvm::Value *val) { Instr->setOperand(1, val); }
-  llvm::Value *get_inputVector2() const { return Instr->getOperand(2); }
-  void set_inputVector2(llvm::Value *val) { Instr->setOperand(2, val); }
-  llvm::Value *get_matrixBuffer() const { return Instr->getOperand(3); }
-  void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(3, val); }
-  llvm::Value *get_matrixOffset() const { return Instr->getOperand(4); }
-  void set_matrixOffset(llvm::Value *val) { Instr->setOperand(4, val); }
-  llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(5); }
-  void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(5, val); }
-  int32_t get_matrixIntepretation_val() const {
-    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(5))
-                         ->getZExtValue());
-  }
-  void set_matrixIntepretation_val(int32_t val) {
-    Instr->setOperand(5, llvm::Constant::getIntegerValue(
-                             llvm::IntegerType::get(Instr->getContext(), 32),
-                             llvm::APInt(32, (uint64_t)val)));
-  }
-  llvm::Value *get_matrixLayout() const { return Instr->getOperand(6); }
-  void set_matrixLayout(llvm::Value *val) { Instr->setOperand(6, val); }
-  int32_t get_matrixLayout_val() const {
-    return (int32_t)(llvm::dyn_cast<llvm::ConstantInt>(Instr->getOperand(6))
-                         ->getZExtValue());
-  }
-  void set_matrixLayout_val(int32_t val) {
-    Instr->setOperand(6, llvm::Constant::getIntegerValue(
-                             llvm::IntegerType::get(Instr->getContext(), 32),
-                             llvm::APInt(32, (uint64_t)val)));
-  }
-  llvm::Value *get_matrixStride() const { return Instr->getOperand(7); }
-  void set_matrixStride(llvm::Value *val) { Instr->setOperand(7, val); }
-};
-
-/// This instruction Accumulates the components of a vector component-wise
-/// atomically (with device scope) to the corresponding elements of an array in
-/// memory
-struct DxilInst_VectorAccumulate {
-  llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_VectorAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr,
-                                          hlsl::OP::OpCode::VectorAccumulate);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (4 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands())
-      return false;
-    return true;
-  }
-  // Metadata
-  bool requiresUniformInputs() const { return false; }
-  // Operand indexes
-  enum OperandIdx {
-    arg_inputVector = 1,
-    arg_arrayBuffer = 2,
-    arg_arrayOffset = 3,
-  };
-  // Accessors
-  llvm::Value *get_inputVector() const { return Instr->getOperand(1); }
-  void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); }
-  llvm::Value *get_arrayBuffer() const { return Instr->getOperand(2); }
-  void set_arrayBuffer(llvm::Value *val) { Instr->setOperand(2, val); }
-  llvm::Value *get_arrayOffset() const { return Instr->getOperand(3); }
-  void set_arrayOffset(llvm::Value *val) { Instr->setOperand(3, val); }
-};
-
 /// This instruction Bitwise AND reduction of the vector returning a scalar
 struct DxilInst_VectorReduceAnd {
   llvm::Instruction *Instr;
diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h
index 88b868a103..25cafb16bd 100644
--- a/include/dxc/HLSL/HLOperations.h
+++ b/include/dxc/HLSL/HLOperations.h
@@ -469,54 +469,6 @@ const unsigned kHitObjectFromRayQuery_WithAttrs_NumOp = 5;
 // HitObject::GetAttributes
 const unsigned kHitObjectGetAttributes_AttributeOpIdx = 2;
 
-// Linear Algebra Operations
-
-// MatVecMul
-const unsigned kMatVecMulOutputVectorIdx = 1;
-const unsigned kMatVecMulIsOutputUnsignedIdx = 2;
-const unsigned kMatVecMulInputVectorIdx = 3;
-const unsigned kMatVecMulIsInputUnsignedIdx = 4;
-const unsigned kMatVecMulInputInterpretationIdx = 5;
-const unsigned kMatVecMulMatrixBufferIdx = 6;
-const unsigned kMatVecMulMatrixOffsetIdx = 7;
-const unsigned kMatVecMulMatrixInterpretationIdx = 8;
-const unsigned kMatVecMulMatrixMIdx = 9;
-const unsigned kMatVecMulMatrixKIdx = 10;
-const unsigned kMatVecMulMatrixLayoutIdx = 11;
-const unsigned kMatVecMulMatrixTransposeIdx = 12;
-const unsigned kMatVecMulMatrixStrideIdx = 13;
-
-// MatVecMulAdd
-const unsigned kMatVecMulAddOutputVectorIdx = 1;
-const unsigned kMatVecMulAddIsOutputUnsignedIdx = 2;
-const unsigned kMatVecMulAddInputVectorIdx = 3;
-const unsigned kMatVecMulAddIsInputUnsignedIdx = 4;
-const unsigned kMatVecMulAddInputInterpretationIdx = 5;
-const unsigned kMatVecMulAddMatrixBufferIdx = 6;
-const unsigned kMatVecMulAddMatrixOffsetIdx = 7;
-const unsigned kMatVecMulAddMatrixInterpretationIdx = 8;
-const unsigned kMatVecMulAddMatrixMIdx = 9;
-const unsigned kMatVecMulAddMatrixKIdx = 10;
-const unsigned kMatVecMulAddMatrixLayoutIdx = 11;
-const unsigned kMatVecMulAddMatrixTransposeIdx = 12;
-const unsigned kMatVecMulAddMatrixStrideIdx = 13;
-const unsigned kMatVecMulAddBiasBufferIdx = 14;
-const unsigned kMatVecMulAddBiasOffsetIdx = 15;
-const unsigned kMatVecMulAddBiasInterpretationIdx = 16;
-
-// OuterProductAccumulate
-const unsigned kOuterProdAccInputVec1Idx = 1;
-const unsigned kOuterProdAccInputVec2Idx = 2;
-const unsigned kOuterProdAccMatrixIdx = 3;
-const unsigned kOuterProdAccMatrixOffsetIdx = 4;
-const unsigned kOuterProdAccMatrixInterpretationIdx = 5;
-const unsigned kOuterProdAccMatrixLayoutIdx = 6;
-const unsigned kOuterProdAccMatrixStrideIdx = 7;
-
-// Vector Accumulate
-const unsigned kVectorAccInputVecIdx = 1;
-const unsigned kVectorAccMatrixIdx = 2;
-const unsigned kVectorAccMatrixOffsetIdx = 3;
 } // namespace HLOperandIndex
 
 llvm::Function *GetOrCreateHLFunction(llvm::Module &M,
diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h
index 2712dc310e..15e2404640 100644
--- a/include/dxc/HlslIntrinsicOp.h
+++ b/include/dxc/HlslIntrinsicOp.h
@@ -13,10 +13,10 @@ enum class IntrinsicOp {
   IOP_Barrier = 5,
   IOP_CallShader = 6,
   IOP_CheckAccessFullyMapped = 7,
-  IOP_ClusterID = 397,
+  IOP_ClusterID = 393,
   IOP_CreateResourceFromHeap = 8,
   IOP_D3DCOLORtoUBYTE4 = 9,
-  IOP_DebugBreak = 424,
+  IOP_DebugBreak = 420,
   IOP_DeviceMemoryBarrier = 10,
   IOP_DeviceMemoryBarrierWithGroupSync = 11,
   IOP_DispatchMesh = 12,
@@ -27,8 +27,8 @@ enum class IntrinsicOp {
   IOP_EvaluateAttributeSnapped = 17,
   IOP_GeometryIndex = 18,
   IOP_GetAttributeAtVertex = 19,
-  IOP_GetGroupWaveCount = 395,
-  IOP_GetGroupWaveIndex = 396,
+  IOP_GetGroupWaveCount = 391,
+  IOP_GetGroupWaveIndex = 392,
   IOP_GetRemainingRecursionLevels = 20,
   IOP_GetRenderTargetSampleCount = 21,
   IOP_GetRenderTargetSamplePosition = 22,
@@ -79,7 +79,7 @@ enum class IntrinsicOp {
   IOP_ReportHit = 67,
   IOP_SetMeshOutputCounts = 68,
   IOP_TraceRay = 69,
-  IOP_TriangleObjectPositions = 401,
+  IOP_TriangleObjectPositions = 397,
   IOP_WaveActiveAllEqual = 70,
   IOP_WaveActiveAllTrue = 71,
   IOP_WaveActiveAnyTrue = 72,
@@ -112,29 +112,25 @@ enum class IntrinsicOp {
   IOP_WorldToObject = 99,
   IOP_WorldToObject3x4 = 100,
   IOP_WorldToObject4x3 = 101,
-  IOP___builtin_LinAlg_CopyConvertMatrix = 405,
-  IOP___builtin_LinAlg_FillMatrix = 406,
-  IOP___builtin_LinAlg_MatrixAccumulate = 415,
-  IOP___builtin_LinAlg_MatrixAccumulateToDescriptor = 419,
-  IOP___builtin_LinAlg_MatrixAccumulateToMemory = 420,
-  IOP___builtin_LinAlg_MatrixGetCoordinate = 407,
-  IOP___builtin_LinAlg_MatrixGetElement = 408,
-  IOP___builtin_LinAlg_MatrixLength = 409,
-  IOP___builtin_LinAlg_MatrixLoadFromDescriptor = 410,
-  IOP___builtin_LinAlg_MatrixLoadFromMemory = 411,
-  IOP___builtin_LinAlg_MatrixMatrixMultiply = 416,
-  IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate = 417,
-  IOP___builtin_LinAlg_MatrixOuterProduct = 421,
-  IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout = 418,
-  IOP___builtin_LinAlg_MatrixSetElement = 412,
-  IOP___builtin_LinAlg_MatrixStoreToDescriptor = 413,
-  IOP___builtin_LinAlg_MatrixStoreToMemory = 414,
-  IOP___builtin_LinAlg_MatrixVectorMultiply = 422,
-  IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 423,
-  IOP___builtin_MatVecMul = 390,
-  IOP___builtin_MatVecMulAdd = 391,
-  IOP___builtin_OuterProductAccumulate = 392,
-  IOP___builtin_VectorAccumulate = 393,
+  IOP___builtin_LinAlg_CopyConvertMatrix = 401,
+  IOP___builtin_LinAlg_FillMatrix = 402,
+  IOP___builtin_LinAlg_MatrixAccumulate = 411,
+  IOP___builtin_LinAlg_MatrixAccumulateToDescriptor = 415,
+  IOP___builtin_LinAlg_MatrixAccumulateToMemory = 416,
+  IOP___builtin_LinAlg_MatrixGetCoordinate = 403,
+  IOP___builtin_LinAlg_MatrixGetElement = 404,
+  IOP___builtin_LinAlg_MatrixLength = 405,
+  IOP___builtin_LinAlg_MatrixLoadFromDescriptor = 406,
+  IOP___builtin_LinAlg_MatrixLoadFromMemory = 407,
+  IOP___builtin_LinAlg_MatrixMatrixMultiply = 412,
+  IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate = 413,
+  IOP___builtin_LinAlg_MatrixOuterProduct = 417,
+  IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout = 414,
+  IOP___builtin_LinAlg_MatrixSetElement = 408,
+  IOP___builtin_LinAlg_MatrixStoreToDescriptor = 409,
+  IOP___builtin_LinAlg_MatrixStoreToMemory = 410,
+  IOP___builtin_LinAlg_MatrixVectorMultiply = 418,
+  IOP___builtin_LinAlg_MatrixVectorMultiplyAdd = 419,
   IOP_abort = 102,
   IOP_abs = 103,
   IOP_acos = 104,
@@ -188,7 +184,7 @@ enum class IntrinsicOp {
   IOP_isfinite = 152,
   IOP_isinf = 153,
   IOP_isnan = 154,
-  IOP_isnormal = 394,
+  IOP_isnormal = 390,
   IOP_ldexp = 155,
   IOP_length = 156,
   IOP_lerp = 157,
@@ -326,7 +322,7 @@ enum class IntrinsicOp {
   MOP_WriteSamplerFeedbackGrad = 286,
   MOP_WriteSamplerFeedbackLevel = 287,
   MOP_Abort = 288,
-  MOP_CandidateClusterID = 398,
+  MOP_CandidateClusterID = 394,
   MOP_CandidateGeometryIndex = 289,
   MOP_CandidateInstanceContributionToHitGroupIndex = 290,
   MOP_CandidateInstanceID = 291,
@@ -339,14 +335,14 @@ enum class IntrinsicOp {
   MOP_CandidateProceduralPrimitiveNonOpaque = 298,
   MOP_CandidateTriangleBarycentrics = 299,
   MOP_CandidateTriangleFrontFace = 300,
-  MOP_CandidateTriangleObjectPositions = 402,
+  MOP_CandidateTriangleObjectPositions = 398,
   MOP_CandidateTriangleRayT = 301,
   MOP_CandidateType = 302,
   MOP_CandidateWorldToObject3x4 = 303,
   MOP_CandidateWorldToObject4x3 = 304,
   MOP_CommitNonOpaqueTriangleHit = 305,
   MOP_CommitProceduralPrimitiveHit = 306,
-  MOP_CommittedClusterID = 399,
+  MOP_CommittedClusterID = 395,
   MOP_CommittedGeometryIndex = 307,
   MOP_CommittedInstanceContributionToHitGroupIndex = 308,
   MOP_CommittedInstanceID = 309,
@@ -360,7 +356,7 @@ enum class IntrinsicOp {
   MOP_CommittedStatus = 317,
   MOP_CommittedTriangleBarycentrics = 318,
   MOP_CommittedTriangleFrontFace = 319,
-  MOP_CommittedTriangleObjectPositions = 403,
+  MOP_CommittedTriangleObjectPositions = 399,
   MOP_CommittedWorldToObject3x4 = 320,
   MOP_CommittedWorldToObject4x3 = 321,
   MOP_Proceed = 322,
@@ -371,7 +367,7 @@ enum class IntrinsicOp {
   MOP_WorldRayOrigin = 327,
   MOP_DxHitObject_FromRayQuery = 363,
   MOP_DxHitObject_GetAttributes = 364,
-  MOP_DxHitObject_GetClusterID = 400,
+  MOP_DxHitObject_GetClusterID = 396,
   MOP_DxHitObject_GetGeometryIndex = 365,
   MOP_DxHitObject_GetHitKind = 366,
   MOP_DxHitObject_GetInstanceID = 367,
@@ -398,8 +394,8 @@ enum class IntrinsicOp {
   MOP_DxHitObject_MakeNop = 358,
   MOP_DxHitObject_SetShaderTableIndex = 388,
   MOP_DxHitObject_TraceRay = 389,
-  MOP_DxHitObject_TriangleObjectPositions = 404,
-  IOP_DxIsDebuggerPresent = 425,
+  MOP_DxHitObject_TriangleObjectPositions = 400,
+  IOP_DxIsDebuggerPresent = 421,
   IOP_DxMaybeReorderThread = 359,
   MOP_Count = 328,
   MOP_FinishedCrossGroupSharing = 329,
@@ -432,7 +428,7 @@ enum class IntrinsicOp {
   IOP_usign = 355,
   MOP_InterlockedUMax = 356,
   MOP_InterlockedUMin = 357,
-  Num_Intrinsics = 426,
+  Num_Intrinsics = 422,
 };
 inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) {
   switch (opcode) {
diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp
index 424a9a0fb7..f0889d0f53 100644
--- a/lib/DXIL/DxilOperations.cpp
+++ b/lib/DXIL/DxilOperations.cpp
@@ -2654,39 +2654,38 @@ static const OP::OpCodeProperty CoreOps_OpCodeProps[] = {
      {{0x4e7}},
      {{0xe7}}}, // Overloads: hfwidl<hfwidl
 
-    // Linear Algebra Operations
-    {OC::MatVecMul,
-     "MatVecMul",
-     OCC::MatVecMul,
-     "matVecMul",
-     Attribute::ReadOnly,
-     2,
-     {{0x400}, {0x400}},
-     {{0x63}, {0x63}}}, // Overloads: <hfwi,<hfwi
-    {OC::MatVecMulAdd,
-     "MatVecMulAdd",
-     OCC::MatVecMulAdd,
-     "matVecMulAdd",
-     Attribute::ReadOnly,
-     2,
-     {{0x400}, {0x400}},
-     {{0x63}, {0x63}}}, // Overloads: <hfwi,<hfwi
-    {OC::OuterProductAccumulate,
-     "OuterProductAccumulate",
-     OCC::OuterProductAccumulate,
-     "outerProductAccumulate",
+    {OC::ReservedD0,
+     "ReservedD0",
+     OCC::Reserved,
+     "reserved",
      Attribute::None,
-     2,
-     {{0x400}, {0x400}},
-     {{0x63}, {0x63}}}, // Overloads: <hfwi,<hfwi
-    {OC::VectorAccumulate,
-     "VectorAccumulate",
-     OCC::VectorAccumulate,
-     "vectorAccumulate",
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedD1,
+     "ReservedD1",
+     OCC::Reserved,
+     "reserved",
      Attribute::None,
-     1,
-     {{0x400}},
-     {{0x63}}}, // Overloads: <hfwi
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedD2,
+     "ReservedD2",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
+    {OC::ReservedD3,
+     "ReservedD3",
+     OCC::Reserved,
+     "reserved",
+     Attribute::None,
+     0,
+     {},
+     {}}, // Overloads: v
 
     // Vector reduce to scalar
     {OC::VectorReduceAnd,
@@ -2978,24 +2977,24 @@ static const OP::OpCodeProperty ExperimentalOps_OpCodeProps[] = {
      {{0x200}, {0x400}, {0x400}},
      {{0x0}, {0x63}, {0x63}}}, // Overloads: o,<hfwi,<hfwi
 
-    {OC::ReservedD1,
-     "ReservedD1",
+    {OC::ReservedE1,
+     "ReservedE1",
      OCC::Reserved,
      "reserved",
      Attribute::None,
      0,
      {},
      {}}, // Overloads: v
-    {OC::ReservedD2,
-     "ReservedD2",
+    {OC::ReservedE2,
+     "ReservedE2",
      OCC::Reserved,
      "reserved",
      Attribute::None,
      0,
      {},
      {}}, // Overloads: v
-    {OC::ReservedD3,
-     "ReservedD3",
+    {OC::ReservedE3,
+     "ReservedE3",
      OCC::Reserved,
      "reserved",
      Attribute::None,
@@ -3947,8 +3946,7 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
         SFLAG(Library) | SFLAG(RayGeneration) | SFLAG(ClosestHit) | SFLAG(Miss);
     return;
   }
-  // Instructions: MatVecMul=305, MatVecMulAdd=306, OuterProductAccumulate=307,
-  // VectorAccumulate=308, ExperimentalNop=2147483648,
+  // Instructions: ExperimentalNop=2147483648,
   // RayQuery_CandidateClusterID=2147483652,
   // RayQuery_CommittedClusterID=2147483653,
   // RayQuery_CandidateTriangleObjectPosition=2147483656,
@@ -3959,8 +3957,7 @@ void OP::GetMinShaderModelAndMask(OpCode C, bool bWithTranslation,
   // LinAlgMatrixAccumulateToDescriptor=2147483675,
   // LinAlgMatrixOuterProduct=2147483677, DebugBreak=2147483681,
   // IsDebuggerPresent=2147483682
-  if ((305 <= op && op <= 308) || op == 2147483648 ||
-      (2147483652 <= op && op <= 2147483653) ||
+  if (op == 2147483648 || (2147483652 <= op && op <= 2147483653) ||
       (2147483656 <= op && op <= 2147483657) || op == 2147483662 ||
       op == 2147483670 || (2147483673 <= op && op <= 2147483675) ||
       op == 2147483677 || (2147483681 <= op && op <= 2147483682)) {
@@ -6436,59 +6433,22 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     A(pI32);
     break;
 
-    // Linear Algebra Operations
-  case OpCode::MatVecMul:
-    A(EXT(0));
-    A(pI32);
-    A(EXT(1));
-    A(pI1);
-    A(pI32);
-    A(pRes);
-    A(pI32);
-    A(pI32);
-    A(pI32);
-    A(pI32);
-    A(pI32);
-    A(pI1);
+    //
+  case OpCode::ReservedD0:
+    A(pV);
     A(pI32);
-    A(pI1);
     break;
-  case OpCode::MatVecMulAdd:
-    A(EXT(0));
-    A(pI32);
-    A(EXT(1));
-    A(pI1);
-    A(pI32);
-    A(pRes);
-    A(pI32);
-    A(pI32);
-    A(pI32);
-    A(pI32);
-    A(pI32);
-    A(pI1);
-    A(pI32);
-    A(pRes);
-    A(pI32);
+  case OpCode::ReservedD1:
+    A(pV);
     A(pI32);
-    A(pI1);
     break;
-  case OpCode::OuterProductAccumulate:
+  case OpCode::ReservedD2:
     A(pV);
     A(pI32);
-    A(EXT(0));
-    A(EXT(1));
-    A(pRes);
-    A(pI32);
-    A(pI32);
-    A(pI32);
-    A(pI32);
     break;
-  case OpCode::VectorAccumulate:
+  case OpCode::ReservedD3:
     A(pV);
     A(pI32);
-    A(pETy);
-    A(pRes);
-    A(pI32);
     break;
 
     // Vector reduce to scalar
@@ -6715,15 +6675,15 @@ Function *OP::GetOpFunc(OpCode opCode, Type *pOverloadType) {
     break;
 
     //
-  case OpCode::ReservedD1:
+  case OpCode::ReservedE1:
     A(pV);
     A(pI32);
     break;
-  case OpCode::ReservedD2:
+  case OpCode::ReservedE2:
     A(pV);
     A(pI32);
     break;
-  case OpCode::ReservedD3:
+  case OpCode::ReservedE3:
     A(pV);
     A(pI32);
     break;
@@ -6909,7 +6869,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::WaveActiveAllEqual:
   case OpCode::CreateHandleForLib:
   case OpCode::WaveMatch:
-  case OpCode::VectorAccumulate:
   case OpCode::VectorReduceAnd:
   case OpCode::VectorReduceOr:
   case OpCode::FDot:
@@ -7034,14 +6993,18 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::ReservedC7:
   case OpCode::ReservedC8:
   case OpCode::ReservedC9:
+  case OpCode::ReservedD0:
+  case OpCode::ReservedD1:
+  case OpCode::ReservedD2:
+  case OpCode::ReservedD3:
   case OpCode::ExperimentalNop:
   case OpCode::GetGroupWaveIndex:
   case OpCode::GetGroupWaveCount:
   case OpCode::ClusterID:
   case OpCode::LinAlgMatrixQueryAccumulatorLayout:
-  case OpCode::ReservedD1:
-  case OpCode::ReservedD2:
-  case OpCode::ReservedD3:
+  case OpCode::ReservedE1:
+  case OpCode::ReservedE2:
+  case OpCode::ReservedE3:
   case OpCode::DebugBreak:
   case OpCode::IsDebuggerPresent:
     return Type::getVoidTy(Ctx);
@@ -7074,8 +7037,13 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
   case OpCode::HitObject_TriangleObjectPosition:
     // These return native vectors directly
     return cast<VectorType>(Ty)->getElementType();
-  case OpCode::MatVecMul:
-  case OpCode::MatVecMulAdd:
+  case OpCode::LinAlgMatrixMultiplyAccumulate:
+    if (FT->getNumParams() < 4)
+      return nullptr;
+    return llvm::StructType::get(Ctx,
+                                 {FT->getReturnType(), FT->getParamType(1),
+                                  FT->getParamType(2), FT->getParamType(3)});
+
   case OpCode::LinAlgFillMatrix:
   case OpCode::LinAlgCopyConvertMatrix:
   case OpCode::LinAlgMatrixGetElement:
@@ -7084,19 +7052,6 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) {
     return llvm::StructType::get(Ctx,
                                  {FT->getReturnType(), FT->getParamType(1)});
 
-  case OpCode::OuterProductAccumulate:
-    if (FT->getNumParams() < 3)
-      return nullptr;
-    return llvm::StructType::get(Ctx,
-                                 {FT->getParamType(1), FT->getParamType(2)});
-
-  case OpCode::LinAlgMatrixMultiplyAccumulate:
-    if (FT->getNumParams() < 4)
-      return nullptr;
-    return llvm::StructType::get(Ctx,
-                                 {FT->getReturnType(), FT->getParamType(1),
-                                  FT->getParamType(2), FT->getParamType(3)});
-
   case OpCode::LinAlgMatrixLoadFromMemory:
     if (FT->getNumParams() < 2)
       return nullptr;
diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp
index e8eb2b9c74..ab9dd697d9 100644
--- a/lib/DxilValidation/DxilValidation.cpp
+++ b/lib/DxilValidation/DxilValidation.cpp
@@ -974,293 +974,6 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode,
   }
 }
 
-static bool CheckLinalgInterpretation(uint32_t Input, bool InRegister) {
-  using CT = DXIL::ComponentType;
-  switch (static_cast<CT>(Input)) {
-  case CT::I16:
-  case CT::U16:
-  case CT::I32:
-  case CT::U32:
-  case CT::F16:
-  case CT::F32:
-  case CT::U8:
-  case CT::I8:
-  case CT::F8_E4M3:
-  case CT::F8_E5M2:
-    return true;
-  case CT::PackedS8x32:
-  case CT::PackedU8x32:
-    return InRegister;
-  default:
-    return false;
-  }
-}
-
-static bool CheckMatrixLayoutForMatVecMulOps(unsigned Layout) {
-  return Layout <=
-         static_cast<unsigned>(DXIL::LinalgMatrixLayout::OuterProductOptimal);
-}
-
-std::string GetMatrixLayoutStr(unsigned Layout) {
-  switch (static_cast<DXIL::LinalgMatrixLayout>(Layout)) {
-  case DXIL::LinalgMatrixLayout::RowMajor:
-    return "RowMajor";
-  case DXIL::LinalgMatrixLayout::ColumnMajor:
-    return "ColumnMajor";
-  case DXIL::LinalgMatrixLayout::MulOptimal:
-    return "MulOptimal";
-  case DXIL::LinalgMatrixLayout::OuterProductOptimal:
-    return "OuterProductOptimal";
-  default:
-    DXASSERT_NOMSG(false);
-    return "Invalid";
-  }
-}
-
-static bool CheckTransposeForMatrixLayout(unsigned Layout, bool Transposed) {
-  switch (static_cast<DXIL::LinalgMatrixLayout>(Layout)) {
-  case DXIL::LinalgMatrixLayout::RowMajor:
-  case DXIL::LinalgMatrixLayout::ColumnMajor:
-    return !Transposed;
-
-  default:
-    return true;
-  }
-}
-
-static bool CheckUnsignedFlag(Type *VecTy, bool IsUnsigned) {
-  Type *ElemTy = VecTy->getScalarType();
-  if (ElemTy->isFloatingPointTy())
-    return !IsUnsigned;
-
-  return true;
-}
-
-static Value *GetMatVecOpIsOutputUnsigned(CallInst *CI, DXIL::OpCode OpCode) {
-  switch (OpCode) {
-  case DXIL::OpCode::MatVecMul:
-    return CI->getOperand(DXIL::OperandIndex::kMatVecMulIsOutputUnsignedIdx);
-  case DXIL::OpCode::MatVecMulAdd:
-    return CI->getOperand(DXIL::OperandIndex::kMatVecMulAddIsOutputUnsignedIdx);
-
-  default:
-    DXASSERT_NOMSG(false);
-    return nullptr;
-  }
-}
-
-static void ValidateImmOperandsForMatVecOps(CallInst *CI, DXIL::OpCode OpCode,
-                                            ValidationContext &ValCtx) {
-
-  llvm::Value *IsInputUnsigned =
-      CI->getOperand(DXIL::OperandIndex::kMatVecMulIsInputUnsignedIdx);
-  ConstantInt *IsInputUnsignedConst =
-      dyn_cast<llvm::ConstantInt>(IsInputUnsigned);
-  if (!IsInputUnsignedConst) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrMatVecOpIsUnsignedFlagsAreConst,
-        {"IsInputUnsigned"});
-    return;
-  }
-
-  llvm::Value *IsOutputUnsigned = GetMatVecOpIsOutputUnsigned(CI, OpCode);
-  ConstantInt *IsOutputUnsignedConst =
-      dyn_cast<llvm::ConstantInt>(IsOutputUnsigned);
-  if (!IsOutputUnsignedConst) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrMatVecOpIsUnsignedFlagsAreConst,
-        {"IsOutputUnsigned"});
-    return;
-  }
-
-  llvm::Value *InputInterpretation =
-      CI->getOperand(DXIL::OperandIndex::kMatVecMulInputInterpretationIdx);
-  ConstantInt *II = dyn_cast<ConstantInt>(InputInterpretation);
-  if (!II) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgInterpretationParamAreConst,
-        {"InputInterpretation"});
-    return;
-  }
-  uint64_t IIValue = II->getLimitedValue();
-  if (!CheckLinalgInterpretation(IIValue, true)) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgInvalidRegisterInterpValue,
-        {std::to_string(IIValue), "Input"});
-    return;
-  }
-
-  llvm::Value *MatrixInterpretation =
-      CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixInterpretationIdx);
-  ConstantInt *MI = dyn_cast<ConstantInt>(MatrixInterpretation);
-  if (!MI) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgInterpretationParamAreConst,
-        {"MatrixInterpretation"});
-    return;
-  }
-  uint64_t MIValue = MI->getLimitedValue();
-  if (!CheckLinalgInterpretation(MIValue, false)) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue,
-        {std::to_string(MIValue), "Matrix"});
-    return;
-  }
-
-  llvm::Value *MatrixM =
-      CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixMIdx);
-  if (!llvm::isa<llvm::Constant>(MatrixM)) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst,
-        {"Matrix M dimension"});
-    return;
-  }
-
-  llvm::Value *MatrixK =
-      CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixKIdx);
-  if (!llvm::isa<llvm::Constant>(MatrixK)) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst,
-        {"Matrix K dimension"});
-    return;
-  }
-
-  llvm::Value *MatrixLayout =
-      CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixLayoutIdx);
-
-  ConstantInt *MatrixLayoutConst = dyn_cast<ConstantInt>(MatrixLayout);
-  if (!MatrixLayoutConst) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst,
-        {"Matrix Layout"});
-    return;
-  }
-  uint64_t MLValue = MatrixLayoutConst->getLimitedValue();
-  if (!CheckMatrixLayoutForMatVecMulOps(MLValue)) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgInvalidMatrixLayoutValueForMatVecOps,
-        {std::to_string(MLValue),
-         std::to_string(
-             static_cast<unsigned>(DXIL::LinalgMatrixLayout::RowMajor)),
-         std::to_string(static_cast<unsigned>(
-             DXIL::LinalgMatrixLayout::OuterProductOptimal))});
-    return;
-  }
-
-  llvm::Value *MatrixTranspose =
-      CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixTransposeIdx);
-  ConstantInt *MatrixTransposeConst = dyn_cast<ConstantInt>(MatrixTranspose);
-  if (!MatrixTransposeConst) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst,
-        {"MatrixTranspose"});
-    return;
-  }
-
-  if (!CheckTransposeForMatrixLayout(MLValue,
-                                     MatrixTransposeConst->getLimitedValue())) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgMatrixLayoutNotTransposable,
-        {GetMatrixLayoutStr(MLValue)});
-    return;
-  }
-
-  llvm::Value *InputVector =
-      CI->getOperand(DXIL::OperandIndex::kMatVecMulInputVectorIdx);
-  if (!CheckUnsignedFlag(InputVector->getType(),
-                         IsInputUnsignedConst->getLimitedValue())) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgNotAnUnsignedType, {"Input"});
-    return;
-  }
-
-  if (!CheckUnsignedFlag(CI->getType(),
-                         IsOutputUnsignedConst->getLimitedValue())) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgNotAnUnsignedType, {"Output"});
-    return;
-  }
-
-  switch (OpCode) {
-  case DXIL::OpCode::MatVecMulAdd: {
-    llvm::Value *BiasInterpretation =
-        CI->getOperand(DXIL::OperandIndex::kMatVecMulAddBiasInterpretation);
-    ConstantInt *BI = cast<ConstantInt>(BiasInterpretation);
-    if (!BI) {
-      ValCtx.EmitInstrFormatError(
-          CI, ValidationRule::InstrLinalgInterpretationParamAreConst,
-          {"BiasInterpretation"});
-      return;
-    }
-    uint64_t BIValue = BI->getLimitedValue();
-    if (!CheckLinalgInterpretation(BIValue, false)) {
-      ValCtx.EmitInstrFormatError(
-          CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue,
-          {std::to_string(BIValue), "Bias vector"});
-      return;
-    }
-  } break;
-  default:
-    break;
-  }
-}
-
-static void ValidateImmOperandsForOuterProdAcc(CallInst *CI,
-                                               ValidationContext &ValCtx) {
-
-  llvm::Value *MatrixInterpretation =
-      CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixInterpretation);
-  ConstantInt *MI = cast<ConstantInt>(MatrixInterpretation);
-  if (!MI) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgInterpretationParamAreConst,
-        {"MatrixInterpretation"});
-    return;
-  }
-  uint64_t MIValue = MI->getLimitedValue();
-  if (!CheckLinalgInterpretation(MIValue, false)) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue,
-        {std::to_string(MIValue), "Matrix"});
-    return;
-  }
-
-  llvm::Value *MatrixLayout =
-      CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixLayout);
-  if (!llvm::isa<llvm::Constant>(MatrixLayout)) {
-    ValCtx.EmitInstrFormatError(
-        CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst,
-        {"MatrixLayout"});
-    return;
-  }
-  ConstantInt *ML = cast<ConstantInt>(MatrixLayout);
-  uint64_t MLValue = ML->getLimitedValue();
-  if (MLValue !=
-      static_cast<unsigned>(DXIL::LinalgMatrixLayout::OuterProductOptimal))
-    ValCtx.EmitInstrFormatError(
-        CI,
-        ValidationRule::
-            InstrLinalgInvalidMatrixLayoutValueForOuterProductAccumulate,
-        {GetMatrixLayoutStr(MLValue),
-         GetMatrixLayoutStr(static_cast<unsigned>(
-             DXIL::LinalgMatrixLayout::OuterProductOptimal))});
-
-  llvm::Value *MatrixStride =
-      CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixStride);
-  if (!llvm::isa<llvm::Constant>(MatrixStride)) {
-    ValCtx.EmitInstrError(
-        CI, ValidationRule::InstrLinalgMatrixStrideZeroForOptimalLayouts);
-    return;
-  }
-  ConstantInt *MS = cast<ConstantInt>(MatrixStride);
-  uint64_t MSValue = MS->getLimitedValue();
-  if (MSValue != 0) {
-    ValCtx.EmitInstrError(
-        CI, ValidationRule::InstrLinalgMatrixStrideZeroForOptimalLayouts);
-    return;
-  }
-}
-
 // Validate the type-defined mask compared to the store value mask which
 // indicates which parts were defined returns true if caller should continue
 // validation
@@ -2454,16 +2167,6 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
                                 {"FlattenedThreadIdInGroup", "SV_GroupIndex",
                                  GetLaunchTypeStr(NodeLaunchType)});
 
-    break;
-  case DXIL::OpCode::MatVecMul:
-  case DXIL::OpCode::MatVecMulAdd:
-    ValidateImmOperandsForMatVecOps(CI, Opcode, ValCtx);
-    break;
-  case DXIL::OpCode::OuterProductAccumulate:
-    ValidateImmOperandsForOuterProdAcc(CI, ValCtx);
-    break;
-  case DXIL::OpCode::VectorAccumulate:
-
     break;
   case DXIL::OpCode::IsInf:
   case DXIL::OpCode::IsNaN:
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 6cd2f83ad7..a005c03fbb 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -6715,199 +6715,6 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode,
   return Builder.CreateSelect(cond, t, f);
 }
 
-Value *TranslateMatVecMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
-                          HLOperationLowerHelper &Helper,
-                          HLObjectOperationLowerHelper *ObjHelper,
-                          bool &Translated) {
-
-  hlsl::OP *HlslOp = &Helper.hlslOP;
-  IRBuilder<> Builder(CI);
-
-  Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));
-
-  // Input parameters
-  Value *InputVector =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulInputVectorIdx);
-  Value *InputIsUnsigned =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulIsInputUnsignedIdx);
-  Value *InputInterpretation =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulInputInterpretationIdx);
-
-  // Matrix parameters
-  Value *MatrixBuffer =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixBufferIdx);
-  Value *MatrixOffset =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixOffsetIdx);
-  Value *MatrixInterpretation =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixInterpretationIdx);
-  Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixMIdx);
-  Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixKIdx);
-  Value *MatrixLayout =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixLayoutIdx);
-  Value *MatrixTranspose =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixTransposeIdx);
-  Value *MatrixStride =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixStrideIdx);
-
-  // Output parameters
-  Value *OutputIsUnsigned =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulIsOutputUnsignedIdx);
-
-  // Get the DXIL function for the operation
-  Function *DxilFunc = HlslOp->GetOpFunc(
-      OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx)
-                   ->getType()
-                   ->getPointerElementType(),
-               InputVector->getType()});
-
-  // Create a call to the DXIL function
-  Value *NewCI = Builder.CreateCall(
-      DxilFunc,
-      {OpArg, InputVector, InputIsUnsigned, InputInterpretation, MatrixBuffer,
-       MatrixOffset, MatrixInterpretation, MatrixM, MatrixK, MatrixLayout,
-       MatrixTranspose, MatrixStride, OutputIsUnsigned});
-
-  // Get the output parameter and store the result
-  Value *OutParam =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx);
-
-  Builder.CreateStore(NewCI, OutParam);
-
-  return nullptr;
-}
-
-Value *TranslateMatVecMulAdd(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode,
-                             HLOperationLowerHelper &Helper,
-                             HLObjectOperationLowerHelper *ObjHelper,
-                             bool &Translated) {
-
-  hlsl::OP *HlslOp = &Helper.hlslOP;
-  IRBuilder<> Builder(CI);
-
-  Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));
-
-  // Input vector parameters
-  Value *InputVector =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputVectorIdx);
-  Value *InputIsUnsigned =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsInputUnsignedIdx);
-  Value *InputInterpretation =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputInterpretationIdx);
-
-  // Matrix parameters
-  Value *MatrixBuffer =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixBufferIdx);
-  Value *MatrixOffset =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixOffsetIdx);
-  Value *MatrixInterpretation =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixInterpretationIdx);
-  Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixMIdx);
-  Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixKIdx);
-  Value *MatrixLayout =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixLayoutIdx);
-  Value *MatrixTranspose =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixTransposeIdx);
-  Value *MatrixStride =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixStrideIdx);
-
-  // Bias parameters
-  Value *BiasBuffer =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasBufferIdx);
-  Value *BiasOffset =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasOffsetIdx);
-  Value *BiasInterpretation =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasInterpretationIdx);
-
-  // Output parameters
-  Value *OutputIsUnsigned =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsOutputUnsignedIdx);
-
-  // Get the DXIL function for the operation
-  Function *DxilFunc = HlslOp->GetOpFunc(
-      OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx)
-                   ->getType()
-                   ->getPointerElementType(),
-               InputVector->getType()});
-
-  // Create a call to the DXIL function
-  Value *NewCI = Builder.CreateCall(
-      DxilFunc, {OpArg, InputVector, InputIsUnsigned, InputInterpretation,
-                 MatrixBuffer, MatrixOffset, MatrixInterpretation, MatrixM,
-                 MatrixK, MatrixLayout, MatrixTranspose, MatrixStride,
-                 BiasBuffer, BiasOffset, BiasInterpretation, OutputIsUnsigned});
-
-  // Store the result in the output parameter
-  Value *OutParam =
-      CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx);
-  Builder.CreateStore(NewCI, OutParam);
-
-  return nullptr;
-}
-
-Value *TranslateOuterProductAccumulate(CallInst *CI, IntrinsicOp IOP,
-                                       OP::OpCode OpCode,
-                                       HLOperationLowerHelper &Helper,
-                                       HLObjectOperationLowerHelper *ObjHelper,
-                                       bool &Translated) {
-
-  hlsl::OP *HlslOp = &Helper.hlslOP;
-  IRBuilder<> Builder(CI);
-
-  Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));
-
-  // Input vector parameters
-  Value *InputVector1 =
-      CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec1Idx);
-  Value *InputVector2 =
-      CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec2Idx);
-
-  // Matrix parameters
-  Value *MatrixBuffer =
-      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixIdx);
-  Value *MatrixOffset =
-      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixOffsetIdx);
-  Value *MatrixInterpretation =
-      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixInterpretationIdx);
-  Value *MatrixLayout =
-      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixLayoutIdx);
-  Value *MatrixStride =
-      CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixStrideIdx);
-
-  // Get the DXIL function for the operation
-  Function *DxilFunc = HlslOp->GetOpFunc(
-      OpCode, {InputVector1->getType(), InputVector2->getType()});
-
-  return Builder.CreateCall(
-      DxilFunc, {OpArg, InputVector1, InputVector2, MatrixBuffer, MatrixOffset,
-                 MatrixInterpretation, MatrixLayout, MatrixStride});
-}
-
-Value *TranslateVectorAccumulate(CallInst *CI, IntrinsicOp IOP,
-                                 OP::OpCode OpCode,
-                                 HLOperationLowerHelper &Helper,
-                                 HLObjectOperationLowerHelper *ObjHelper,
-                                 bool &Translated) {
-
-  hlsl::OP *HlslOp = &Helper.hlslOP;
-  IRBuilder<> Builder(CI);
-
-  Constant *OpArg = HlslOp->GetU32Const(static_cast<unsigned>(OpCode));
-
-  // Input vector parameter
-  Value *InputVector = CI->getArgOperand(HLOperandIndex::kVectorAccInputVecIdx);
-
-  // Matrix parameters
-  Value *MatrixBuffer = CI->getArgOperand(HLOperandIndex::kVectorAccMatrixIdx);
-  Value *MatrixOffset =
-      CI->getArgOperand(HLOperandIndex::kVectorAccMatrixOffsetIdx);
-
-  // Get the DXIL function for the operation
-  Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType());
-
-  return Builder.CreateCall(DxilFunc,
-                            {OpArg, InputVector, MatrixBuffer, MatrixOffset});
-}
-
 Value *TranslateLinAlgFillMatrix(CallInst *CI, IntrinsicOp IOP,
                                  OP::OpCode OpCode,
                                  HLOperationLowerHelper &Helper,
@@ -7994,15 +7801,6 @@ constexpr IntrinsicLower gLowerTable[] = {
     {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay,
      DXIL::OpCode::HitObject_TraceRay},
 
-    {IntrinsicOp::IOP___builtin_MatVecMul, TranslateMatVecMul,
-     DXIL::OpCode::MatVecMul},
-    {IntrinsicOp::IOP___builtin_MatVecMulAdd, TranslateMatVecMulAdd,
-     DXIL::OpCode::MatVecMulAdd},
-    {IntrinsicOp::IOP___builtin_OuterProductAccumulate,
-     TranslateOuterProductAccumulate, DXIL::OpCode::OuterProductAccumulate},
-    {IntrinsicOp::IOP___builtin_VectorAccumulate, TranslateVectorAccumulate,
-     DXIL::OpCode::VectorAccumulate},
-
     {IntrinsicOp::IOP_isnormal, TrivialIsSpecialFloat, DXIL::OpCode::IsNormal},
 
     {IntrinsicOp::IOP_GetGroupWaveCount, TranslateWaveToVal,
diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f2e2f7c4ed..ce6d6b17cc 100644
--- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8058,27 +8058,6 @@ def err_hlsl_linalg_attributed_matrix_required
 def err_hlsl_linalg_unsupported_stage : Error<
    "builtin unavailable in shader stage '%0' (requires 'compute', 'mesh' or 'amplification')">;
 
-def err_hlsl_linalg_mul_muladd_output_vector_size_not_equal_to_matrix_M : Error<
-  "output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation">;
-def err_hlsl_linalg_mul_muladd_unpacked_input_vector_size_not_equal_to_matrix_K : Error<
-  "unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation">;
-def err_hlsl_linalg_mul_muladd_packed_input_vector_size_incorrect : Error<
-  "packed input vector length must be the smallest number that can hold matrix dim K values of the "
-  "packed(smaller) type in linalg mul/muladd operations">;
-def err_hlsl_linalg_mul_muladd_isUnsigned_for_packed_input_must_be_true : Error<
-  "IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations">;
-def err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint : Error<
-  "packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations">;
-def err_hlsl_linalg_mul_muladd_invalid_dim: Error<
-  "matrix dimension %select{M|K when using unpacked input vectors|K "
-  "when using packed input vectors}0 must be less than %1, in a linalg "
-  "Mul/MulAdd operation">;
-
-def err_hlsl_linalg_outer_prod_acc_vector_type_mismatch : Error<
-  "input vectors of outerproductaccumulate must have the same element type">;
-def err_hlsl_linalg_outer_prod_acc_matrix_layout_must_be_outer_prod_acc_optimal : Error<
-  "matrix layout for outerproductaccumulate must be %0">;
-
 // HLSL Change Ends
 
 // SPIRV Change Starts
diff --git a/tools/clang/lib/Headers/hlsl/dx/coopvec.h b/tools/clang/lib/Headers/hlsl/dx/coopvec.h
deleted file mode 100644
index b99e1dbca4..0000000000
--- a/tools/clang/lib/Headers/hlsl/dx/coopvec.h
+++ /dev/null
@@ -1,198 +0,0 @@
-// Header for cooperative vectors APIs.
-
-#if __spirv__
-#error "Cooperative vectors not (yet) supported for SPIRV"
-#endif
-
-#if ((__SHADER_TARGET_MAJOR > 6) ||                                            \
-     (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 9)) &&            \
-    (__HLSL_VERSION >= 2021)
-
-namespace dx {
-namespace coopvec {
-
-// NOTE: can't be an enum class because we get this error:
-//     error: non-type template argument of type 'dx::coopvec::DataType' is not
-//     an integral constant expression
-//
-enum DataType {
-  DATA_TYPE_SINT16 = 2,           // ComponentType::I16
-  DATA_TYPE_UINT16 = 3,           // ComponentType::U16
-  DATA_TYPE_SINT32 = 4,           // ComponentType::I32
-  DATA_TYPE_UINT32 = 5,           // ComponentType::U32
-  DATA_TYPE_FLOAT16 = 8,          // ComponentType::F16
-  DATA_TYPE_FLOAT32 = 9,          // ComponentType::F32
-  DATA_TYPE_SINT8_T4_PACKED = 17, // ComponentType::PackedS8x32
-  DATA_TYPE_UINT8_T4_PACKED = 18, // ComponentType::PackedU8x32
-  DATA_TYPE_SINT8 = 19,           // ComponentType::I8
-  DATA_TYPE_UINT8 = 20,           // ComponentType::U8
-  DATA_TYPE_FLOAT8_E4M3 = 21,     // ComponentType::F8_E4M3
-                                  // (1 sign, 4 exp, 3 mantissa bits)
-  DATA_TYPE_FLOAT8_E5M2 = 22,     // ComponentType::F8_E5M2
-                                  // (1 sign, 5 exp, 2 mantissa bits)
-};
-
-enum MatrixLayout {
-  MATRIX_LAYOUT_ROW_MAJOR = 0,
-  MATRIX_LAYOUT_COLUMN_MAJOR = 1,
-  MATRIX_LAYOUT_MUL_OPTIMAL = 2,
-  MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL = 3
-};
-
-//
-// Helper for signedness
-//
-namespace details {
-
-template <typename T> struct IsUnsigned {};
-
-#define _SPECIALIZE_ISUNSIGNED(type, value)                                    \
-  template <> struct IsUnsigned<type> {                                        \
-    static const bool Value = value;                                           \
-  }
-
-_SPECIALIZE_ISUNSIGNED(uint8_t4_packed, true);
-_SPECIALIZE_ISUNSIGNED(int8_t4_packed, true);
-_SPECIALIZE_ISUNSIGNED(uint32_t, true);
-_SPECIALIZE_ISUNSIGNED(int32_t, false);
-_SPECIALIZE_ISUNSIGNED(float32_t, false);
-
-#ifdef __HLSL_ENABLE_16_BIT
-_SPECIALIZE_ISUNSIGNED(uint16_t, true);
-_SPECIALIZE_ISUNSIGNED(int16_t, false);
-_SPECIALIZE_ISUNSIGNED(float16_t, false);
-#else  // //__HLSL_ENABLE_16_BIT
-_SPECIALIZE_ISUNSIGNED(half, false);
-#endif //__HLSL_ENABLE_16_BIT
-
-#undef _SPECIALIZE_ISUNSIGNED
-
-} // namespace details
-
-//
-// (RW)MatrixRef
-//
-
-template <typename BufferTy, DataType DT, uint M, uint K, MatrixLayout ML,
-          bool Transpose>
-struct MatrixRefImpl {
-  BufferTy Buffer;
-  uint StartOffset;
-  uint Stride;
-};
-
-template <DataType DT, uint M, uint K, MatrixLayout ML, bool Transpose = false>
-using MatrixRef = MatrixRefImpl<ByteAddressBuffer, DT, M, K, ML, Transpose>;
-
-template <DataType DT, uint M, uint K, MatrixLayout ML, bool Transpose = false>
-using RWMatrixRef = MatrixRefImpl<RWByteAddressBuffer, DT, M, K, ML, Transpose>;
-
-//
-// (RW)VectorRef
-//
-
-template <typename BufferTy, DataType DT> struct VectorRefImpl {
-  BufferTy Buffer;
-  uint StartOffset;
-};
-
-template <DataType DT> using VectorRef = VectorRefImpl<ByteAddressBuffer, DT>;
-
-template <DataType DT>
-using RWVectorRef = VectorRefImpl<RWByteAddressBuffer, DT>;
-
-//
-// Vector
-//
-
-template <typename T, int N, DataType DT> struct InterpretedVector {
-  vector<T, N> Data;
-};
-
-template <DataType DT, typename T, int N>
-InterpretedVector<T, N, DT> MakeInterpretedVector(vector<T, N> Vec) {
-  InterpretedVector<T, N, DT> IV = {Vec};
-  return IV;
-}
-
-//
-// Mul
-//
-
-template <typename OutputElTy, typename InputElTy, int InputElCount,
-          typename MatrixBufferTy, DataType InputDT, DataType MatrixDT,
-          uint MatrixM, uint MatrixK, MatrixLayout MatrixLayout,
-          bool MatrixTranspose>
-vector<OutputElTy, MatrixM>
-Mul(MatrixRefImpl<MatrixBufferTy, MatrixDT, MatrixM, MatrixK, MatrixLayout,
-                  MatrixTranspose>
-        Matrix,
-    InterpretedVector<InputElTy, InputElCount, InputDT> InputVector) {
-
-  vector<OutputElTy, MatrixM> OutputVector;
-
-  __builtin_MatVecMul(
-      /*out*/ OutputVector, details::IsUnsigned<OutputElTy>::Value,
-      InputVector.Data, details::IsUnsigned<InputElTy>::Value, InputDT,
-      Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK,
-      MatrixLayout, MatrixTranspose, Matrix.Stride);
-
-  return OutputVector;
-}
-
-//
-// MulAdd
-//
-
-template <typename OutputElTy, typename InputElTy, int InputElCount,
-          typename MatrixBufferTy, DataType InputDT, DataType MatrixDT,
-          uint MatrixM, uint MatrixK, MatrixLayout MatrixLayout,
-          bool MatrixTranspose, typename BiasVectorBufferTy,
-          DataType BiasVectorDT>
-vector<OutputElTy, MatrixM>
-MulAdd(MatrixRefImpl<MatrixBufferTy, MatrixDT, MatrixM, MatrixK, MatrixLayout,
-                     MatrixTranspose>
-           Matrix,
-       InterpretedVector<InputElTy, InputElCount, InputDT> InputVector,
-       VectorRefImpl<BiasVectorBufferTy, BiasVectorDT> BiasVector) {
-
-  vector<OutputElTy, MatrixM> OutputVector;
-
-  __builtin_MatVecMulAdd(
-      /*out*/ OutputVector, details::IsUnsigned<OutputElTy>::Value,
-      InputVector.Data, details::IsUnsigned<InputElTy>::Value, InputDT,
-      Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK,
-      MatrixLayout, MatrixTranspose, Matrix.Stride, BiasVector.Buffer,
-      BiasVector.StartOffset, BiasVectorDT);
-
-  return OutputVector;
-}
-
-//
-// OuterProductAccumulate
-//
-
-template <typename ElTy, int MatrixM, int MatrixN, DataType MatrixDT,
-          MatrixLayout MatrixLayout>
-void OuterProductAccumulate(
-    vector<ElTy, MatrixM> InputVector1, vector<ElTy, MatrixN> InputVector2,
-    RWMatrixRef<MatrixDT, MatrixM, MatrixN, MatrixLayout, false> Matrix) {
-  __builtin_OuterProductAccumulate(InputVector1, InputVector2, Matrix.Buffer,
-                                   Matrix.StartOffset, MatrixDT, MatrixLayout,
-                                   Matrix.Stride);
-}
-
-//
-// VectorAccumulate
-//
-
-template <typename ElTy, int ElCount>
-void VectorAccumulate(vector<ElTy, ElCount> InputVector,
-                      RWByteAddressBuffer Buffer, uint Offset) {
-  __builtin_VectorAccumulate(InputVector, Buffer, Offset);
-}
-
-} // namespace coopvec
-} // namespace dx
-
-#endif // SM 6.9 check and HV version check
diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp
index f1dd0fc42e..30f27fcb6f 100644
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@@ -11921,537 +11921,6 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE,
   return false;
 }
 
-// MatVec Ops
-static const unsigned kMatVecMulOutputVectorIdx = 0;
-static const unsigned kMatVecMulOutputIsUnsignedIdx = 1;
-static const unsigned kMatVecMulInputVectorIdx = 2;
-static const unsigned kMatVecMulIsInputUnsignedIdx = 3;
-static const unsigned kMatVecMulInputInterpretationIdx = 4;
-// static const unsigned kMatVecMulMatrixBufferIdx = 5;
-// static const unsigned kMatVecMulMatrixOffsetIdx = 6;
-static const unsigned kMatVecMulMatrixInterpretationIdx = 7;
-static const unsigned kMatVecMulMatrixMIdx = 8;
-static const unsigned kMatVecMulMatrixKIdx = 9;
-static const unsigned kMatVecMulMatrixLayoutIdx = 10;
-static const unsigned kMatVecMulMatrixTransposeIdx = 11;
-static const unsigned kMatVecMulMatrixStrideIdx = 12;
-
-// MatVecAdd
-const unsigned kMatVecMulAddBiasInterpretation = 15;
-
-static bool IsValidMatrixLayoutForMulAndMulAddOps(unsigned Layout) {
-  return Layout <=
-         static_cast<unsigned>(DXIL::LinalgMatrixLayout::OuterProductOptimal);
-}
-
-static bool IsOptimalTypeMatrixLayout(unsigned Layout) {
-  return (
-      Layout == (static_cast<unsigned>(DXIL::LinalgMatrixLayout::MulOptimal)) ||
-      (Layout ==
-       (static_cast<unsigned>(DXIL::LinalgMatrixLayout::OuterProductOptimal))));
-}
-
-static bool IsValidTransposeForMatrixLayout(unsigned Layout, bool Transposed) {
-  switch (static_cast<DXIL::LinalgMatrixLayout>(Layout)) {
-  case DXIL::LinalgMatrixLayout::RowMajor:
-  case DXIL::LinalgMatrixLayout::ColumnMajor:
-    return !Transposed;
-
-  default:
-    return true;
-  }
-}
-
-static bool IsPackedType(unsigned type) {
-  return (type == static_cast<unsigned>(DXIL::ComponentType::PackedS8x32) ||
-          type == static_cast<unsigned>(DXIL::ComponentType::PackedU8x32));
-}
-
-static bool IsValidLinalgTypeInterpretation(uint32_t Input, bool InRegister) {
-
-  switch (static_cast<DXIL::ComponentType>(Input)) {
-  case DXIL::ComponentType::I16:
-  case DXIL::ComponentType::U16:
-  case DXIL::ComponentType::I32:
-  case DXIL::ComponentType::U32:
-  case DXIL::ComponentType::F16:
-  case DXIL::ComponentType::F32:
-  case DXIL::ComponentType::U8:
-  case DXIL::ComponentType::I8:
-  case DXIL::ComponentType::F8_E4M3:
-  case DXIL::ComponentType::F8_E5M2:
-    return true;
-  case DXIL::ComponentType::PackedS8x32:
-  case DXIL::ComponentType::PackedU8x32:
-    return InRegister;
-  default:
-    return false;
-  }
-}
-
-static bool IsValidVectorAndMatrixDimensions(Sema &S, CallExpr *CE,
-                                             unsigned InputVectorSize,
-                                             unsigned OutputVectorSize,
-                                             unsigned MatrixK, unsigned MatrixM,
-                                             bool isInputPacked) {
-  // Check if output vector size equals to matrix dimension M
-  if (OutputVectorSize != MatrixM) {
-    Expr *OutputVector = CE->getArg(kMatVecMulOutputVectorIdx);
-    S.Diags.Report(
-        OutputVector->getExprLoc(),
-        diag::
-            err_hlsl_linalg_mul_muladd_output_vector_size_not_equal_to_matrix_M);
-    return false;
-  }
-
-  // Check if input vector size equals to matrix dimension K in the unpacked
-  // case.
-  // Check if input vector size equals the smallest number that can hold
-  // matrix dimension K values
-  const unsigned PackingFactor = isInputPacked ? 4 : 1;
-  unsigned MinInputVectorSize = (MatrixK + PackingFactor - 1) / PackingFactor;
-  if (InputVectorSize != MinInputVectorSize) {
-    Expr *InputVector = CE->getArg(kMatVecMulInputVectorIdx);
-    if (isInputPacked) {
-      S.Diags.Report(
-          InputVector->getExprLoc(),
-          diag::err_hlsl_linalg_mul_muladd_packed_input_vector_size_incorrect);
-      return false;
-    } else {
-      S.Diags.Report(
-          InputVector->getExprLoc(),
-          diag::
-              err_hlsl_linalg_mul_muladd_unpacked_input_vector_size_not_equal_to_matrix_K);
-      return false;
-    }
-  }
-
-  return true;
-}
-
-static void CheckCommonMulAndMulAddParameters(Sema &S, CallExpr *CE,
-                                              const hlsl::ShaderModel *SM) {
-  // Check if IsOutputUnsigned is a const parameter
-  bool IsOutputUnsignedFlagValue = false;
-  Expr *IsOutputUnsignedExpr = CE->getArg(kMatVecMulOutputIsUnsignedIdx);
-  llvm::APSInt IsOutputUnsignedExprVal;
-  if (IsOutputUnsignedExpr->isIntegerConstantExpr(IsOutputUnsignedExprVal,
-                                                  S.Context)) {
-    IsOutputUnsignedFlagValue = IsOutputUnsignedExprVal.getBoolValue();
-  } else {
-    S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(), diag::err_expr_not_ice)
-        << 0;
-    return;
-  }
-
-  Expr *OutputVectorExpr = CE->getArg(kMatVecMulOutputVectorIdx);
-  unsigned OutputVectorSizeValue = 0;
-  if (IsHLSLVecType(OutputVectorExpr->getType())) {
-    OutputVectorSizeValue = GetHLSLVecSize(OutputVectorExpr->getType());
-    QualType OutputVectorType =
-        GetHLSLVecElementType(OutputVectorExpr->getType());
-    const Type *OutputVectorTypePtr = OutputVectorType.getTypePtr();
-
-    // Check if IsOutputUnsigned flag matches output vector type.
-    // Must be true for unsigned int outputs, false for signed int/float
-    // outputs.
-    if (IsOutputUnsignedFlagValue &&
-        !OutputVectorTypePtr->isUnsignedIntegerType()) {
-      DXASSERT_NOMSG(OutputVectorTypePtr->isSignedIntegerType() ||
-                     OutputVectorTypePtr->isFloatingType());
-      S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type)
-          << "IsOuputUnsigned" << false
-          << (OutputVectorTypePtr->isSignedIntegerType() ? 1 : 0);
-      return;
-    } else if (!IsOutputUnsignedFlagValue &&
-               OutputVectorTypePtr->isUnsignedIntegerType()) {
-      S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type)
-          << "IsOuputUnsigned" << true << 2;
-      return;
-    }
-  }
-
-  // Check if isInputUnsigned parameter is a constant
-  bool IsInputUnsignedFlagValue = false;
-  Expr *IsInputUnsignedExpr = CE->getArg(kMatVecMulIsInputUnsignedIdx);
-  llvm::APSInt IsInputUnsignedExprVal;
-  if (IsInputUnsignedExpr->isIntegerConstantExpr(IsInputUnsignedExprVal,
-                                                 S.Context)) {
-    IsInputUnsignedFlagValue = IsInputUnsignedExprVal.getBoolValue();
-  } else {
-    S.Diags.Report(IsInputUnsignedExpr->getExprLoc(), diag::err_expr_not_ice)
-        << 0;
-    return;
-  }
-
-  // Get InputInterpretation, check if it is constant
-  Expr *InputInterpretationExpr = CE->getArg(kMatVecMulInputInterpretationIdx);
-  llvm::APSInt InputInterpretationExprVal;
-  unsigned InputInterpretationValue = 0;
-  if (InputInterpretationExpr->isIntegerConstantExpr(InputInterpretationExprVal,
-                                                     S.Context)) {
-    InputInterpretationValue = InputInterpretationExprVal.getLimitedValue();
-    const bool InRegisterInterpretation = true;
-    if (!IsValidLinalgTypeInterpretation(InputInterpretationValue,
-                                         InRegisterInterpretation)) {
-      S.Diags.Report(InputInterpretationExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_interpretation_value_incorrect)
-          << std::to_string(InputInterpretationValue)
-          << InRegisterInterpretation;
-      return;
-    }
-  } else {
-    S.Diags.Report(InputInterpretationExpr->getExprLoc(),
-                   diag::err_expr_not_ice)
-        << 0;
-    return;
-  }
-
-  bool IsInputVectorPacked = IsPackedType(InputInterpretationValue);
-
-  // For packed types input vector type must be uint and isUnsigned must be
-  // true. The signedness is determined from the InputInterpretation
-  Expr *InputVectorExpr = CE->getArg(kMatVecMulInputVectorIdx);
-  unsigned InputVectorSizeValue = 0;
-  if (IsHLSLVecType(InputVectorExpr->getType())) {
-    InputVectorSizeValue = GetHLSLVecSize(InputVectorExpr->getType());
-    QualType InputVectorType =
-        GetHLSLVecElementType(InputVectorExpr->getType());
-    unsigned BitWidth = S.Context.getTypeSize(InputVectorType);
-    bool Is32Bit = (BitWidth == 32);
-    const Type *InputVectorTypePtr = InputVectorType.getTypePtr();
-
-    // Check if the isUnsigned flag setting
-    if (IsInputVectorPacked) {
-      // Check that the input vector element type is "32bit"
-      if (!Is32Bit) {
-        S.Diags.Report(
-            InputVectorExpr->getExprLoc(),
-            diag::err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint);
-        return;
-      }
-
-      // Check that the input vector element type is an unsigned int
-      if (!InputVectorTypePtr->isUnsignedIntegerType()) {
-        S.Diags.Report(
-            InputVectorExpr->getExprLoc(),
-            diag::err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint);
-        return;
-      }
-
-      // Check that isInputUnsigned is always true
-      // Actual signedness is inferred from the InputInterpretation
-      if (!IsInputUnsignedFlagValue) {
-        S.Diags.Report(
-            IsInputUnsignedExpr->getExprLoc(),
-            diag::
-                err_hlsl_linalg_mul_muladd_isUnsigned_for_packed_input_must_be_true);
-        return;
-      }
-    } else {
-      if (IsInputUnsignedFlagValue &&
-          !InputVectorTypePtr->isUnsignedIntegerType()) {
-        DXASSERT_NOMSG(InputVectorTypePtr->isSignedIntegerType() ||
-                       InputVectorTypePtr->isFloatingType());
-        S.Diags.Report(
-            IsInputUnsignedExpr->getExprLoc(),
-            diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type)
-            << "IsInputUnsigned" << false
-            << (InputVectorTypePtr->isSignedIntegerType() ? 1 : 0);
-        return;
-      } else if (!IsInputUnsignedFlagValue &&
-                 InputVectorTypePtr->isUnsignedIntegerType()) {
-        S.Diags.Report(
-            IsInputUnsignedExpr->getExprLoc(),
-            diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type)
-            << "IsInputUnsigned" << true << 2;
-        return;
-      }
-    }
-  }
-
-  // Get Matrix Dimensions M and K, check if they are constants
-  Expr *MatrixKExpr = CE->getArg(kMatVecMulMatrixKIdx);
-  llvm::APSInt MatrixKExprVal;
-  unsigned MatrixKValue = 0;
-  if (MatrixKExpr->isIntegerConstantExpr(MatrixKExprVal, S.Context)) {
-    MatrixKValue = MatrixKExprVal.getLimitedValue();
-  } else {
-    S.Diags.Report(MatrixKExpr->getExprLoc(), diag::err_expr_not_ice) << 0;
-    return;
-  }
-
-  Expr *MatrixMExpr = CE->getArg(kMatVecMulMatrixMIdx);
-  llvm::APSInt MatrixMExprVal;
-  unsigned MatrixMValue = 0;
-  if (MatrixMExpr->isIntegerConstantExpr(MatrixMExprVal, S.Context)) {
-    MatrixMValue = MatrixMExprVal.getLimitedValue();
-  } else {
-    S.Diags.Report(MatrixMExpr->getExprLoc(), diag::err_expr_not_ice) << 0;
-    return;
-  }
-
-  // Check MatrixM and MatrixK values are non-zero
-  if (MatrixMValue == 0) {
-    S.Diags.Report(MatrixMExpr->getExprLoc(),
-                   diag::err_hlsl_linalg_matrix_dim_must_be_greater_than_zero)
-        << std::to_string(DXIL::kSM69MaxVectorLength);
-    return;
-  }
-
-  if (MatrixKValue == 0) {
-    S.Diags.Report(MatrixKExpr->getExprLoc(),
-                   diag::err_hlsl_linalg_matrix_dim_must_be_greater_than_zero)
-        << std::to_string(DXIL::kSM69MaxVectorLength);
-    return;
-  }
-
-  // Check MatrixM and MatrixK values are less than max
-  // Matrix dimension cannot exceed largest vector length in a Mul/MulAdd
-  // operation.
-  if (MatrixMValue > DXIL::kSM69MaxVectorLength) {
-    S.Diags.Report(MatrixMExpr->getExprLoc(),
-                   diag::err_hlsl_linalg_mul_muladd_invalid_dim)
-        << 0 << std::to_string(DXIL::kSM69MaxVectorLength);
-    return;
-  }
-
-  // For packed input vectors 4 values are packed in a uint, so max Matrix K
-  // can be 4096
-  if (IsInputVectorPacked) {
-    const unsigned PackingFactor =
-        4; // Only supported packed formats: DATA_TYPE_(U)SINT8_T4_PACKED
-    if (MatrixKValue > DXIL::kSM69MaxVectorLength * PackingFactor) {
-      S.Diags.Report(MatrixKExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_mul_muladd_invalid_dim)
-          << 2 << std::to_string(DXIL::kSM69MaxVectorLength * PackingFactor);
-      return;
-    }
-  } else {
-    if (MatrixKValue > DXIL::kSM69MaxVectorLength) {
-      S.Diags.Report(MatrixKExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_mul_muladd_invalid_dim)
-          << 1 << std::to_string(DXIL::kSM69MaxVectorLength);
-      return;
-    }
-  }
-
-  if (!IsValidVectorAndMatrixDimensions(S, CE, InputVectorSizeValue,
-                                        OutputVectorSizeValue, MatrixKValue,
-                                        MatrixMValue, IsInputVectorPacked)) {
-    return;
-  }
-
-  // Get MatrixInterpretation, check if it is constant
-  // Make sure it is a valid value
-  Expr *MatrixInterpretationExpr =
-      CE->getArg(kMatVecMulMatrixInterpretationIdx);
-  llvm::APSInt MatrixInterpretationExprVal;
-  unsigned MatrixInterpretationValue = 0;
-  if (MatrixInterpretationExpr->isIntegerConstantExpr(
-          MatrixInterpretationExprVal, S.Context)) {
-    MatrixInterpretationValue = MatrixInterpretationExprVal.getLimitedValue();
-    const bool InRegisterInterpretation = false;
-    if (!IsValidLinalgTypeInterpretation(MatrixInterpretationValue,
-                                         InRegisterInterpretation)) {
-      S.Diags.Report(MatrixInterpretationExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_interpretation_value_incorrect)
-          << std::to_string(MatrixInterpretationValue)
-          << InRegisterInterpretation;
-      return;
-    }
-  } else {
-    S.Diags.Report(MatrixInterpretationExpr->getExprLoc(),
-                   diag::err_expr_not_ice)
-        << 0;
-    return;
-  }
-
-  // Get MatrixLayout, check if it is constant and valid value
-  Expr *MatrixLayoutExpr = CE->getArg(kMatVecMulMatrixLayoutIdx);
-  llvm::APSInt MatrixLayoutExprVal;
-  unsigned MatrixLayoutValue = 0;
-  if (MatrixLayoutExpr->isIntegerConstantExpr(MatrixLayoutExprVal, S.Context)) {
-    MatrixLayoutValue = MatrixLayoutExprVal.getLimitedValue();
-    if (!IsValidMatrixLayoutForMulAndMulAddOps(MatrixLayoutValue)) {
-      S.Diags.Report(MatrixLayoutExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_matrix_layout_invalid)
-          << std::to_string(MatrixLayoutValue)
-          << std::to_string(
-                 static_cast<unsigned>(DXIL::LinalgMatrixLayout::RowMajor))
-          << std::to_string(static_cast<unsigned>(
-                 DXIL::LinalgMatrixLayout::OuterProductOptimal));
-      return;
-    }
-  } else {
-    S.Diags.Report(MatrixLayoutExpr->getExprLoc(), diag::err_expr_not_ice) << 0;
-    return;
-  }
-
-  // Get MatrixTranspose, check if it is constant
-  Expr *MatrixTransposeExpr = CE->getArg(kMatVecMulMatrixTransposeIdx);
-  llvm::APSInt MatrixTransposeExprVal;
-  unsigned MatrixTransposeValue = 0;
-  if (MatrixTransposeExpr->isIntegerConstantExpr(MatrixTransposeExprVal,
-                                                 S.Context)) {
-    MatrixTransposeValue = MatrixTransposeExprVal.getBoolValue();
-    if (!IsValidTransposeForMatrixLayout(MatrixLayoutValue,
-                                         MatrixTransposeValue)) {
-
-      S.Diags.Report(MatrixTransposeExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_matrix_layout_is_not_transposable);
-      return;
-    }
-  } else {
-    S.Diags.Report(MatrixTransposeExpr->getExprLoc(), diag::err_expr_not_ice)
-        << 0;
-    return;
-  }
-
-  // Get MatrixStride, check if it is constant, if yes it should be zero
-  // for optimal layouts
-  Expr *MatrixStrideExpr = CE->getArg(kMatVecMulMatrixStrideIdx);
-  llvm::APSInt MatrixStrideExprVal;
-  unsigned MatrixStrideValue = 0;
-  if (MatrixStrideExpr->isIntegerConstantExpr(MatrixStrideExprVal, S.Context)) {
-    MatrixStrideValue = MatrixStrideExprVal.getLimitedValue();
-    if (IsOptimalTypeMatrixLayout(MatrixLayoutValue) &&
-        MatrixStrideValue != 0) {
-      S.Diags.Report(
-          MatrixStrideExpr->getExprLoc(),
-          diag::
-              err_hlsl_linalg_optimal_matrix_layout_matrix_stride_must_be_zero);
-      return;
-    }
-  }
-}
-
-static void CheckMulCall(Sema &S, FunctionDecl *FD, CallExpr *CE,
-                         const hlsl::ShaderModel *SM) {
-  CheckCommonMulAndMulAddParameters(S, CE, SM);
-}
-
-static void CheckMulAddCall(Sema &S, FunctionDecl *FD, CallExpr *CE,
-                            const hlsl::ShaderModel *SM) {
-  CheckCommonMulAndMulAddParameters(S, CE, SM);
-
-  // Check if BiasInterpretation is constant and a valid value
-  Expr *BiasInterpretationExpr = CE->getArg(kMatVecMulAddBiasInterpretation);
-  llvm::APSInt BiasInterpretationExprVal;
-  unsigned BiasInterpretationValue = 0;
-  if (BiasInterpretationExpr->isIntegerConstantExpr(BiasInterpretationExprVal,
-                                                    S.Context)) {
-    BiasInterpretationValue = BiasInterpretationExprVal.getLimitedValue();
-    const bool InRegisterInterpretation = false;
-    if (!IsValidLinalgTypeInterpretation(BiasInterpretationValue,
-                                         InRegisterInterpretation)) {
-      S.Diags.Report(BiasInterpretationExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_interpretation_value_incorrect)
-          << std::to_string(BiasInterpretationValue)
-          << InRegisterInterpretation;
-      return;
-    }
-  } else {
-    S.Diags.Report(BiasInterpretationExpr->getExprLoc(), diag::err_expr_not_ice)
-        << 0;
-    return;
-  }
-}
-
-// Linalg Outer Product Accumulate
-// OuterProductAccumulate builtin function parameters
-static const unsigned kOuterProdAccInputVector1Idx = 0;
-static const unsigned kOuterProdAccInputVector2Idx = 1;
-// static const unsigned kOuterProdAccMatrixBufferIdx = 2;
-// static const unsigned kOuterProdAccMatrixOffsetIdx = 3;
-static const unsigned kOuterProdAccMatrixInterpretationIdx = 4;
-static const unsigned kOuterProdAccMatrixLayoutIdx = 5;
-static const unsigned kOuterProdAccMatrixStrideIdx = 6;
-
-static void CheckOuterProductAccumulateCall(Sema &S, FunctionDecl *FD,
-                                            CallExpr *CE) {
-  // Check InputVector1 and InputVector2 are the same type
-  const Expr *InputVector1Expr = CE->getArg(kOuterProdAccInputVector1Idx);
-  const Expr *InputVector2Expr = CE->getArg(kOuterProdAccInputVector2Idx);
-  QualType InputVector1Type = InputVector1Expr->getType();
-  QualType InputVector2Type = InputVector2Expr->getType();
-
-  // Get the element types of the vectors
-  const QualType InputVector1ElementType =
-      GetHLSLVecElementType(InputVector1Type);
-  const QualType InputVector2ElementType =
-      GetHLSLVecElementType(InputVector2Type);
-
-  if (!S.Context.hasSameType(InputVector1ElementType,
-                             InputVector2ElementType)) {
-    S.Diags.Report(InputVector2Expr->getExprLoc(),
-                   diag::err_hlsl_linalg_outer_prod_acc_vector_type_mismatch);
-    return;
-  }
-
-  // Check Matrix Interpretation is a constant and a valid value
-  Expr *MatrixInterpretationExpr =
-      CE->getArg(kOuterProdAccMatrixInterpretationIdx);
-  llvm::APSInt MatrixInterpretationExprVal;
-  unsigned MatrixInterpretationValue = 0;
-  if (MatrixInterpretationExpr->isIntegerConstantExpr(
-          MatrixInterpretationExprVal, S.Context)) {
-    MatrixInterpretationValue = MatrixInterpretationExprVal.getLimitedValue();
-    const bool InRegisterInterpretation = false;
-    if (!IsValidLinalgTypeInterpretation(MatrixInterpretationValue,
-                                         InRegisterInterpretation)) {
-      S.Diags.Report(MatrixInterpretationExpr->getExprLoc(),
-                     diag::err_hlsl_linalg_interpretation_value_incorrect)
-          << std::to_string(MatrixInterpretationValue)
-          << InRegisterInterpretation;
-      return;
-    }
-  } else {
-    S.Diags.Report(MatrixInterpretationExpr->getExprLoc(),
-                   diag::err_expr_not_ice)
-        << 0;
-    return;
-  }
-
-  // Check Matrix Layout must be a constant and Training Optimal
-  Expr *MatrixLayoutExpr = CE->getArg(kOuterProdAccMatrixLayoutIdx);
-  llvm::APSInt MatrixLayoutExprVal;
-  unsigned MatrixLayoutValue = 0;
-  if (MatrixLayoutExpr->isIntegerConstantExpr(MatrixLayoutExprVal, S.Context)) {
-    MatrixLayoutValue = MatrixLayoutExprVal.getLimitedValue();
-    if (MatrixLayoutValue !=
-        static_cast<unsigned>(DXIL::LinalgMatrixLayout::OuterProductOptimal)) {
-      S.Diags.Report(
-          MatrixLayoutExpr->getExprLoc(),
-          diag::
-              err_hlsl_linalg_outer_prod_acc_matrix_layout_must_be_outer_prod_acc_optimal)
-          << std::to_string(static_cast<unsigned>(
-                 DXIL::LinalgMatrixLayout::OuterProductOptimal));
-      return;
-    }
-  } else {
-    S.Diags.Report(MatrixLayoutExpr->getExprLoc(), diag::err_expr_not_ice) << 0;
-    return;
-  }
-
-  // Matrix Stride must be zero (Training Optimal matrix layout)
-  Expr *MatrixStrideExpr = CE->getArg(kOuterProdAccMatrixStrideIdx);
-  llvm::APSInt MatrixStrideExprVal;
-  unsigned MatrixStrideValue = 0;
-  if (MatrixStrideExpr->isIntegerConstantExpr(MatrixStrideExprVal, S.Context)) {
-    MatrixStrideValue = MatrixStrideExprVal.getLimitedValue();
-    if (MatrixStrideValue != 0) {
-      S.Diags.Report(
-          MatrixStrideExpr->getExprLoc(),
-          diag::
-              err_hlsl_linalg_optimal_matrix_layout_matrix_stride_must_be_zero);
-      return;
-    }
-  }
-}
-
 #ifdef ENABLE_SPIRV_CODEGEN
 static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE,
                                      bool isStatic) {
@@ -12568,15 +12037,6 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
   case hlsl::IntrinsicOp::IOP_Barrier:
     CheckBarrierCall(*this, FDecl, TheCall, SM);
     break;
-  case hlsl::IntrinsicOp::IOP___builtin_MatVecMul:
-    CheckMulCall(*this, FDecl, TheCall, SM);
-    break;
-  case hlsl::IntrinsicOp::IOP___builtin_MatVecMulAdd:
-    CheckMulAddCall(*this, FDecl, TheCall, SM);
-    break;
-  case hlsl::IntrinsicOp::IOP___builtin_OuterProductAccumulate:
-    CheckOuterProductAccumulateCall(*this, FDecl, TheCall);
-    break;
   case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex:
     // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want
     // to limit the scope, and fail gracefully in some cases.
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/check-shader-stages.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/check-shader-stages.hlsl
deleted file mode 100644
index 324e8414b5..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/check-shader-stages.hlsl
+++ /dev/null
@@ -1,135 +0,0 @@
-// RUN: %dxc -T lib_6_10 %s | FileCheck %s
- 
-ByteAddressBuffer matrix_buffer;
-ByteAddressBuffer bias_buffer;
-RWByteAddressBuffer rw_matrix_buffer;
-ByteAddressBuffer input_vector_buffer;
-RWByteAddressBuffer output_vector_buffer;
-
-void UseCoopVec() {
-    vector<float, 4> output_vector;
-    static const uint is_output_unsigned = 0;
-
-    vector<float, 4> input_vector = input_vector_buffer.Load<vector<float, 4> >(0);
-    const uint is_input_unsigned = 0;
-    const uint input_interpretation = 9; /*F32*/
-
-    const uint matrix_offset = 0;
-    const uint matrix_interpretation = 9; /*F32*/
-    const uint matrix_dimM = 4;
-    const uint matrix_dimK = 4;
-    const uint matrix_layout = 0; /*RowMajor*/
-    const bool matrix_is_transposed = false;
-    const uint matrix_stride = 64;
-
-    __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-      is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-      matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-      matrix_is_transposed, matrix_stride);
-    output_vector_buffer.Store(0, output_vector);
-
-    const uint bias_offset = 0;
-    const uint bias_interpretation = 9; /*F32*/
-
-    __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-      is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-      matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-      matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
-      bias_interpretation);
-    output_vector_buffer.Store(1024, output_vector);
-
-    vector<uint, 8> input_vector1;
-    vector<uint, 8> input_vector2;
-    const uint opa_matrix_offset = 0;
-    const uint opa_matrix_interpretation = 5; /*U32*/
-    const uint opa_matrix_layout = 3; /*OuterProductOptimal*/
-    const uint opa_matrix_stride = 0;
-
-    __builtin_OuterProductAccumulate(input_vector1, input_vector2,
-      rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
-      opa_matrix_layout, opa_matrix_stride);
-
-    const uint va_matrix_offset = 0;
-
-     __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer,
-       va_matrix_offset);
-}
-
-// CHECK: define void @ps_main()
-// CHECK: call <4 x float> @dx.op.matVecMul
-// CHECK: call <4 x float> @dx.op.matVecMulAdd
-// CHECK: call void @dx.op.outerProductAccumulate
-// CHECK: call void @dx.op.vectorAccumulate
-
-[Shader("pixel")]
-void ps_main()
-{
-    UseCoopVec();
-}
-
-// CHECK: define void @cs_main()
-// CHECK: call <4 x float> @dx.op.matVecMul
-// CHECK: call <4 x float> @dx.op.matVecMulAdd
-// CHECK: call void @dx.op.outerProductAccumulate
-// CHECK: call void @dx.op.vectorAccumulate
-
-[Shader("compute")]
-[NumThreads(1,1,1)]
-void cs_main()
-{
-    UseCoopVec();
-}
-
-// CHECK: define void @vs_main()
-// CHECK: call <4 x float> @dx.op.matVecMul
-// CHECK: call <4 x float> @dx.op.matVecMulAdd
-// CHECK: call void @dx.op.outerProductAccumulate
-// CHECK: call void @dx.op.vectorAccumulate
-
-[Shader("vertex")]
-void vs_main()
-{
-    UseCoopVec();
-}
-
-struct MyRecord{
-    uint a;
-};
-
-// CHECK: define void @ns_main()
-// CHECK: call <4 x float> @dx.op.matVecMul
-// CHECK: call <4 x float> @dx.op.matVecMulAdd
-// CHECK: call void @dx.op.outerProductAccumulate
-// CHECK: call void @dx.op.vectorAccumulate
-
-[Shader("node")]
-[NodeLaunch("thread")]
-void ns_main(ThreadNodeInputRecord<MyRecord> input)
-{
-    UseCoopVec();
-}
-
-// Vertex shader output structure
-struct VS_OUT {
-    float3 Color : COLOR0;
-};
-
-// Geometry shader output structure
-struct GS_OUT {
-    float3 Color : COLOR0;
-    float2 TexCoord : TEXCOORD0;
-};
-
-// CHECK: define void @gs_main()
-// CHECK:  call <4 x float> @dx.op.matVecMul
-// CHECK: call <4 x float> @dx.op.matVecMulAdd
-// CHECK: call void @dx.op.outerProductAccumulate
-// CHECK: call void @dx.op.vectorAccumulate
-
-[shader("geometry")]
-[maxvertexcount(3)]
-void gs_main(point VS_OUT input[1], 
-    inout TriangleStream<GS_OUT> OutputStream)
-{
-    UseCoopVec();
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/linalg-builtins.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/linalg-builtins.hlsl
deleted file mode 100644
index 44b04e456d..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/linalg-builtins.hlsl
+++ /dev/null
@@ -1,79 +0,0 @@
-// RUN: %dxc -fcgl -T cs_6_10 -E cs_main %s | FileCheck %s
-
-ByteAddressBuffer input_vector_buffer;
-ByteAddressBuffer opa_input_buffer;
-ByteAddressBuffer matrix_buffer;
-ByteAddressBuffer bias_buffer;
-RWByteAddressBuffer rw_matrix_buffer;
-RWByteAddressBuffer output_vector_buffer;
-
-[Shader("compute")]
-[NumThreads(1,1,1)]
-void cs_main()
-{    
-    vector<float, 4> output_vector;
-    static const uint is_output_unsigned = 0;
-    
-    vector<float, 4> input_vector = input_vector_buffer.Load<vector<float, 4> >(0);
-    const uint is_input_unsigned = 0;
-    const uint input_interpretation = 9; /*F32*/
-    
-    const uint matrix_offset = 0;
-    const uint matrix_interpretation = 9; /*F32*/
-    const uint matrix_dimM = 4;
-    const uint matrix_dimK = 4;
-    const uint matrix_layout = 0; /*RowMajor*/
-    const bool matrix_is_transposed = false; 
-    const uint matrix_stride = 64;
-
-    // CHECK: %[[MLD0:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A"
-    // CHECK: %[[MCH0:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[MLD0]])
-    // CHECK: %[[MAH0:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH0]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef)
-    // CHECK: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x float>* %{{[^ ]+}}, i1 false, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH0]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64)
-    __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-      is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-      matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-      matrix_is_transposed, matrix_stride);
-    output_vector_buffer.Store(0, output_vector);
-
-    const uint bias_offset = 0;
-    const uint bias_interpretation = 9; /*F32*/
-
-    // CHECK: %[[MLD1:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A"
-    // CHECK: %[[MCH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[MLD1]])
-    // CHECK: %[[MAH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH1]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef)
-    // CHECK-NEXT: %[[BLD1:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A"
-    // CHECK-NEXT: %[[BCH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[BLD1]])
-    // CHECK-NEXT: %[[BAH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[BCH1]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef)
-    // CHECK-NEXT: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x float>* %{{[^ ]+}}, i1 false, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH1]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %[[BAH1]], i32 0, i32 9)
-    __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-      is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-      matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-      matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
-      bias_interpretation);
-    output_vector_buffer.Store(1024, output_vector);
-
-    vector<uint, 8> input_vector1 = opa_input_buffer.Load<vector<uint, 8> >(0);
-    vector<uint, 8> input_vector2 = opa_input_buffer.Load<vector<uint, 8> >(128);
-    const uint opa_matrix_offset = 0;
-    const uint opa_matrix_interpretation = 5; /*U32*/
-    const uint opa_matrix_layout = 3; /*OuterProductOptimal*/
-    const uint opa_matrix_stride = 0;
-
-    // CHECK: %[[MLD2:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A"
-    // CHECK: %[[MCH2:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %[[MLD2]])
-    // CHECK: %[[MAH2:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH2]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
-    // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[MAH2]], i32 0, i32 5, i32 3, i32 0)
-    __builtin_OuterProductAccumulate(input_vector1, input_vector2,
-      rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
-      opa_matrix_layout, opa_matrix_stride);
-
-    const uint va_matrix_offset = 0;
-
-    // CHECK: %[[MLD3:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A"
-    // CHECK: %[[MCH3:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %[[MLD3]])
-    // CHECK: %[[MAH3:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH3]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef)
-    // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32 393, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[MAH3]], i32 0)
-    __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer,
-      va_matrix_offset); 
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/lit.local.cfg b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/lit.local.cfg
deleted file mode 100644
index 4d63444a4c..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.unsupported = 'dxil-1-10' not in config.available_features
\ No newline at end of file
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/mat-vec-mul-add_multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/mat-vec-mul-add_multioverload.hlsl
deleted file mode 100644
index b00dd4e223..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/mat-vec-mul-add_multioverload.hlsl
+++ /dev/null
@@ -1,122 +0,0 @@
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-0
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-1
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-2
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-3
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-4
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-5
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-6
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-7
-
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7
-
-
-// COMMON: define void @main()
-
-// Test minimum support set of combinations for matVecMul
-// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8)
-
-// DXIL-0: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false)  ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned)
-
-// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8)
-
-// DXIL-1: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false)  ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned)
-
-// HLOP-2: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8)
-
-// DXIL-2: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false)  ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned)
-
-// HLOP-3: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <2 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <2 x i32> %{{[^ ]+}}, i1 true, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4)
-
-// DXIL-3: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v2i32(i32 306, <2 x i32> {{[^ ]+}}, i1 true, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false)  ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned)
-
-// HLOP-4: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4)
-
-// DXIL-4: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false)  ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned)
-
-// Test unsigned variations
-// HLOP-5: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20)
-
-// DXIL-5: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 true)  ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned)
-
-// HLOP-6: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20)
-
-// DXIL-6: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false)  ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned)
-
-// HLOP-7: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20)
-
-// DXIL-7: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false)  ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned)
-
-
-ByteAddressBuffer input_vector_buffer; 
-ByteAddressBuffer matrix_buffer;
-ByteAddressBuffer bias_buffer;
-RWByteAddressBuffer rw_matrix_buffer;
-RWByteAddressBuffer output_vector_buffer;
-
-enum CompType {
-  Invalid = 0,
-  I1 = 1,
-  I16 = 2,
-  U16 = 3,
-  I32 = 4,
-  U32 = 5,
-  I64 = 6,
-  U64 = 7,
-  F16 = 8,
-  F32 = 9,
-  F64 = 10,
-  SNormF16 = 11,
-  UNormF16 = 12,
-  SNormF32 = 13,
-  UNormF32 = 14,
-  SNormF64 = 15,
-  UNormF64 = 16,
-  PackedS8x32 = 17,
-  PackedU8x32 = 18,
-
-  // BEGIN NEW FOR SM 6.9
-  U8 = 19,
-  I8 = 20,
-  F8_E4M3 = 21,
-  F8_E5M2 = 22,
-};
-
-enum MatLayout {
-  RowMajor = 0,
-  ColumnMajor = 1,
-  MulOptimal = 2,
-  OuterProductOptimal = 3,
-};
-
-[NumThreads(1,1,1)]
-void main()
-{    
-    vector<OTY, 8> output_vector;
-    static const uint is_output_unsigned = OU;
-    
-    vector<ITY, INUM> input_vector = input_vector_buffer.Load<vector<ITY, INUM> >(0);
-    const uint is_input_unsigned = IU;
-    const uint input_interpretation = II;
-    
-    const uint matrix_offset = 0;
-    const uint matrix_interpretation = MI;
-    const uint matrix_dimM = 8;
-    const uint matrix_dimK = 8;
-    const uint matrix_layout = ML;
-    const bool matrix_is_transposed = (bool) MT; 
-    const uint matrix_stride = MST;
-
-    const uint bias_offset = 0;
-    const uint bias_interpretation = BI;
-
-    __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, 
-        matrix_dimM, matrix_dimK, matrix_layout, matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-    output_vector_buffer.Store(0, output_vector);
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/mat-vec-mul_multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/mat-vec-mul_multioverload.hlsl
deleted file mode 100644
index ebe76a04c4..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/mat-vec-mul_multioverload.hlsl
+++ /dev/null
@@ -1,118 +0,0 @@
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-0
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-1
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DMST=0| FileCheck %s --check-prefixes COMMON,DXIL-2
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=PackedS8x32 -DINUM=2 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-3
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-4
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DINUM=8 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-5
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DINUM=8 -DML=ColumnMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-6
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DINUM=8 -DML=MulOptimal -DMT=1 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-7
-
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7
-
-// COMMON: define void @main()
-
-// Test minimum support set of combinations for matVecMul
-// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64)
-
-// DXIL-0: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false)  ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
-
-// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0)
-
-// DXIL-1: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, i1 false)  ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
-
-// HLOP-2: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0)
-
-// DXIL-2: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, i1 false)  ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
-
-// HLOP-3: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <2 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <2 x i32> %{{[^ ]+}}, i1 true, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0)
-
-// DXIL-3: call <8 x i32>  @dx.op.matVecMul.v8i32.v2i32(i32 305, <2 x i32> {{[^ ]+}}, i1 true, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, i1 false)  ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
-
-// HLOP-4: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64)
-
-// DXIL-4: call <8 x i32> @dx.op.matVecMul.v8i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false)  ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
-
-// Test unsigned variations
-// HLOP-5: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64)
-
-// DXIL-5: call <8 x i32> @dx.op.matVecMul.v8i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 true)  ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
-
-// HLOP-6: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64)
-
-// DXIL-6: call <8 x i32> @dx.op.matVecMul.v8i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, i1 false)  ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
-
-// HLOP-7: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0)
-
-// DXIL-7: call <8 x i32> @dx.op.matVecMul.v8i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, i1 false)  ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned)
-
-
-ByteAddressBuffer input_vector_buffer; 
-ByteAddressBuffer matrix_buffer;
-ByteAddressBuffer bias_buffer;
-RWByteAddressBuffer rw_matrix_buffer;
-RWByteAddressBuffer output_vector_buffer;
-
-enum CompType {
-  Invalid = 0,
-  I1 = 1,
-  I16 = 2,
-  U16 = 3,
-  I32 = 4,
-  U32 = 5,
-  I64 = 6,
-  U64 = 7,
-  F16 = 8,
-  F32 = 9,
-  F64 = 10,
-  SNormF16 = 11,
-  UNormF16 = 12,
-  SNormF32 = 13,
-  UNormF32 = 14,
-  SNormF64 = 15,
-  UNormF64 = 16,
-  PackedS8x32 = 17,
-  PackedU8x32 = 18,
-
-  // BEGIN NEW FOR SM 6.9
-  U8 = 19,
-  I8 = 20,
-  F8_E4M3 = 21,
-  F8_E5M2 = 22,
-};
-
-enum MatLayout {
-  RowMajor = 0,
-  ColumnMajor = 1,
-  MulOptimal = 2,
-  OuterProductOptimal = 3,
-};
-
-[NumThreads(1,1,1)]
-void main()
-{    
-    vector<OTY, 8> output_vector;
-    static const uint is_output_unsigned = OU;
-    
-    vector<ITY, INUM> input_vector = input_vector_buffer.Load<vector<ITY, INUM> >(0);
-    const uint is_input_unsigned = IU;
-    const uint input_interpretation = II;
-    
-    const uint matrix_offset = 0;
-    const uint matrix_interpretation = MI;
-    const uint matrix_dimM = 8;
-    const uint matrix_dimK = 8;
-    const uint matrix_layout = ML;
-    const bool matrix_is_transposed = (bool) MT; 
-    const uint matrix_stride = MST;
-
-    __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, 
-        matrix_dimM, matrix_dimK, matrix_layout, matrix_is_transposed, matrix_stride);
-    output_vector_buffer.Store(0, output_vector);
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/outer-product-accumulate-multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/outer-product-accumulate-multioverload.hlsl
deleted file mode 100644
index 9feb48e6be..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/outer-product-accumulate-multioverload.hlsl
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-0
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-1
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-2
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1
-// RUN: %dxc -T cs_6_10 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2
-
-ByteAddressBuffer input_vector_buffer;
-ByteAddressBuffer input_vector_buffer2;
-RWByteAddressBuffer matrix_buffer;
-
-// COMMON: define void @main()
-
-// DXIL-0: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-
-// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0)
-
-// DXIL-1: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 0)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-
-// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 0)
-
-// DXIL-2: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 0)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-
-// HLOP-2: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 0)
-
-enum CompType {
-  Invalid = 0,
-  I1 = 1,
-  I16 = 2,
-  U16 = 3,
-  I32 = 4,
-  U32 = 5,
-  I64 = 6,
-  U64 = 7,
-  F16 = 8,
-  F32 = 9,
-  F64 = 10,
-  SNormF16 = 11,
-  UNormF16 = 12,
-  SNormF32 = 13,
-  UNormF32 = 14,
-  SNormF64 = 15,
-  UNormF64 = 16,
-  PackedS8x32 = 17,
-  PackedU8x32 = 18,
-
-  // BEGIN NEW FOR SM 6.9
-  U8 = 19,
-  I8 = 20,
-  F8_E4M3 = 21,
-  F8_E5M2 = 22,
-};
-
-enum MatLayout {
-  RowMajor = 0,
-  ColumnMajor = 1,
-  MulOptimal = 2,
-  OuterProductOptimal = 3,
-};
-
-
-[Numthreads(1,1,1)]
-void main()
-{
-    vector<ITY, 8> input_vector1 = input_vector_buffer.Load<vector<ITY, 8> >(0);
-    vector<ITY, 8> input_vector2 = input_vector_buffer2.Load<vector<ITY, 8> >(0);
-
-    const uint matrix_interpretation = MI;
-    const uint matrix_layout = ML;
-    const uint matrix_offset = 0;
-    const uint matrix_stride = 0;
-
-    __builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride);
-
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/vector-accumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/vector-accumulate.hlsl
deleted file mode 100644
index 779a371af1..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/builtins/vector-accumulate.hlsl
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: %dxc -T cs_6_10 %s | FileCheck %s
-
-RWByteAddressBuffer matrix_buffer;
-
-// Test use of __builtin_VectorAccumulate in compute shader
-// CHECK: define void @main()
-// CHECK: call void @dx.op.vectorAccumulate.v2i32(i32 {{[0-9]+}}, <2 x i32> <i32 5, i32 5>, %dx.types.Handle {{%[0-9]+}}, i32 0)
-
-[NumThreads(1,1,1)]
-void main()
-{
-    vector<uint, 2> input_vector1 = 5;
-    const uint matrix_offset = 0;
-
-     __builtin_VectorAccumulate(input_vector1, matrix_buffer, matrix_offset);
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/mat-vec-mul.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/mat-vec-mul.hlsl
deleted file mode 100644
index 1a7dfabfe2..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/mat-vec-mul.hlsl
+++ /dev/null
@@ -1,93 +0,0 @@
-// REQUIRES: dxil-1-10
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s | FileCheck %s
-
-#include <dx/coopvec.h>
-
-ByteAddressBuffer Buf;
-
-export float4 Test1(vector<float, 4> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_FLOAT16, 4, 4, MATRIX_LAYOUT_MUL_OPTIMAL, true> Matrix = {
-      Buf, 0, 0};
-
-  // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMul.v4f32.v4f32(i32 305, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle %{{.+}}, i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, i1 false)
-  return Mul<float>(    
-      Matrix, MakeInterpretedVector<DATA_TYPE_FLOAT16>(Input));
-}
-
-export vector<float, 8> Test2(vector<uint, 6> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT8, 8, 6 * 4, MATRIX_LAYOUT_MUL_OPTIMAL> Matrix = {
-      Buf, 0, 0};
-
-  // note the stride argument is dropped.
-  // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 20, i32 8, i32 24, i32 2, i1 false, i32 0, i1 false)
-  return Mul<float>(Matrix,
-                    MakeInterpretedVector<DATA_TYPE_UINT8_T4_PACKED>(Input));
-}
-
-// test that "stride" isn't ignored in non-optimal layouts
-export vector<float, 8> Test3(vector<uint, 6> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT8, 8, 6 * 4, MATRIX_LAYOUT_ROW_MAJOR> Matrix = {
-      Buf, 0, 6 * 4 * 8};
-
-  // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 20, i32 8, i32 24, i32 0, i1 false, i32 192, i1 false)
-  return Mul<float>(Matrix,
-                    MakeInterpretedVector<DATA_TYPE_UINT8_T4_PACKED>(Input));
-}
-
-// test that isUnsigned is set correctly for uint16_t
-export vector<uint16_t, 8> Test4(vector<uint, 6> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT8, 8, 6 * 4, MATRIX_LAYOUT_ROW_MAJOR> Matrix = {
-      Buf, 0, 6 * 4 * 8};
-
-  // CHECK: %{{.+}} = call <8 x i16> @dx.op.matVecMul.v8i16.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 20, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true)
-  return Mul<uint16_t>(Matrix,
-                    MakeInterpretedVector<DATA_TYPE_UINT8_T4_PACKED>(Input));  
-
-}
-
-// test that isUnsigned is set correctly for uint32_t
-export vector<uint, 8> Test5(vector<uint, 6> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT8, 8, 6 * 4, MATRIX_LAYOUT_ROW_MAJOR> Matrix = {
-      Buf, 0, 6 * 4 * 8};
-
-  // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 20, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true)
-  return Mul<uint>(Matrix,
-                    MakeInterpretedVector<DATA_TYPE_UINT8_T4_PACKED>(Input));  
-
-}
-
-// test that isUnsigned is set correctly for uint8_t4_packed
-export vector<uint, 8> Test5(vector<uint8_t4_packed, 6> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT8, 8, 6 * 4, MATRIX_LAYOUT_ROW_MAJOR> Matrix = {
-      Buf, 0, 6 * 4 * 8};
-
-  // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 20, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true)
-  return Mul<uint>(Matrix,
-                    MakeInterpretedVector<DATA_TYPE_UINT8_T4_PACKED>(Input));  
-
-}
-
-// test that isUnsigned is set correctly for int8_t4_packed
-export vector<uint, 8> Test5(vector<int8_t4_packed, 6> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT8, 8, 6 * 4, MATRIX_LAYOUT_ROW_MAJOR> Matrix = {
-      Buf, 0, 6 * 4 * 8};
-
-  // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 17, %dx.types.Handle %{{.+}}, i32 0, i32 20, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true)
-  return Mul<uint>(Matrix,
-                    MakeInterpretedVector<DATA_TYPE_SINT8_T4_PACKED>(Input));  
-
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/mat-vec-muladd.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/mat-vec-muladd.hlsl
deleted file mode 100644
index 74c448c3cc..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/mat-vec-muladd.hlsl
+++ /dev/null
@@ -1,91 +0,0 @@
-// REQUIRES: dxil-1-10
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 %s | FileCheck %s
-
-#include <dx/coopvec.h>
-
-ByteAddressBuffer Buf;
-
-export float4 Test1(float4 input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_FLOAT16, 4, 4, MATRIX_LAYOUT_MUL_OPTIMAL> matrix = {Buf,
-                                                                          0, 0};
-  VectorRef<DATA_TYPE_FLOAT16> biasVector = {Buf, 256};
-
-  InterpretedVector<float, 4, DATA_TYPE_FLOAT16> theVector = {input};
-
-  // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 false, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false)
-  return MulAdd<float>(
-      matrix, theVector,
-      biasVector);
-}
-
-export float4 Test2(float4 input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_FLOAT16, 4, 4, MATRIX_LAYOUT_MUL_OPTIMAL, true> matrix = {
-      Buf, 0, 0};
-  VectorRef<DATA_TYPE_FLOAT16> biasVector = {Buf, 256};
-
-  InterpretedVector<float, 4, DATA_TYPE_FLOAT16> theVector = {input};
-
-  // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false)
-  return MulAdd<float>(
-      matrix, theVector,
-      biasVector);
-}
-
-export float4 Test3(float4 input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_FLOAT16, 4, 4, MATRIX_LAYOUT_MUL_OPTIMAL, true> matrix = {
-      Buf, 0, 0};
-  VectorRef<DATA_TYPE_FLOAT16> biasVector = {Buf, 256};
-
-  // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false)
-  return MulAdd<float>(
-      matrix, MakeInterpretedVector<DATA_TYPE_FLOAT16>(input),
-      biasVector);
-}
-
-namespace ProposalExample {
-
-ByteAddressBuffer model;
-
-vector<float, 3> ApplyNeuralMaterial(vector<half, 8> inputVector) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_FLOAT8_E4M3, 32, 8, MATRIX_LAYOUT_MUL_OPTIMAL> matrix0 = {
-      model, 0, 0};
-
-  VectorRef<DATA_TYPE_FLOAT16> biasVector0 = {model, 1024};
-
-  MatrixRef<DATA_TYPE_FLOAT8_E4M3, 32, 32, MATRIX_LAYOUT_MUL_OPTIMAL> matrix1 =
-      {model, 2048, 0};
-
-  VectorRef<DATA_TYPE_FLOAT16> biasVector1 = {model, 3072};
-
-  MatrixRef<DATA_TYPE_FLOAT8_E4M3, 3, 32, MATRIX_LAYOUT_MUL_OPTIMAL> matrix2 = {
-      model, 4096, 0};
-
-  VectorRef<DATA_TYPE_FLOAT16> biasVector2 = {model, 5120};
-
-  vector<half, 32> layer0 = MulAdd<half>(
-      matrix0, MakeInterpretedVector<DATA_TYPE_FLOAT8_E4M3>(inputVector),
-      biasVector0);
-  layer0 = max(layer0, 0);
-
-  vector<half, 32> layer1 = MulAdd<half>(
-      matrix1, MakeInterpretedVector<DATA_TYPE_FLOAT8_E4M3>(layer0),
-      biasVector1);
-  layer1 = max(layer1, 0);
-
-  vector<float, 3> output = MulAdd<float>(
-      matrix2, MakeInterpretedVector<DATA_TYPE_FLOAT8_E4M3>(layer1),
-      biasVector2);
-  output = exp(output);
-
-  return output;
-}
-
-} // namespace ProposalExample
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/outer-product-accumulate-matrix-layout.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/outer-product-accumulate-matrix-layout.hlsl
deleted file mode 100644
index d7e1176b04..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/outer-product-accumulate-matrix-layout.hlsl
+++ /dev/null
@@ -1,29 +0,0 @@
-// REQUIRES: dxil-1-10
-// RUN: %dxc -I %hlsl_headers -T cs_6_10 %s -enable-16bit-types -DML=MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL -DSTRIDE=0 2>&1 | FileCheck %s
-
-//Source file for the IR in \tools\clang\test\LitDXILValidation\outer-product-accumulate-matrix-layout-failing.ll
-//Source file for the IR in \tools\clang\test\LitDXILValidation\outer-product-accumulate-matrix-layout-passing.ll
-
-ByteAddressBuffer input_vector_buffer;
-ByteAddressBuffer input_vector_buffer2;
-RWByteAddressBuffer matrix_buffer;
-
-#include <dx/coopvec.h>
-
-// CHECK: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0)
-using namespace dx::coopvec;
-
-[Numthreads(1,1,1)]
-[shader("compute")]
-void main()
-{
-  vector<half, 8> input_vector1 = input_vector_buffer.Load<vector<half, 8> >(0);
-  vector<half, 8> input_vector2 = input_vector_buffer2.Load<vector<half, 8> >(0);
-
-  const uint matrix_interpretation = DATA_TYPE_FLOAT16;
-  const uint matrix_layout = ML;
-  const uint matrix_offset = 0;
-  const uint matrix_stride = STRIDE;
-
-  __builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride);
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/outerproductaccumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/outerproductaccumulate.hlsl
deleted file mode 100644
index 6ec3fbdcab..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/outerproductaccumulate.hlsl
+++ /dev/null
@@ -1,17 +0,0 @@
-// REQUIRES: dxil-1-10
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s | FileCheck %s
-
-#include <dx/coopvec.h>
-
-RWByteAddressBuffer RWBuf;
-
-export void Test4(vector<half, 128> Input1, vector<half, 64> Input2) {
-  using namespace dx::coopvec;
-
-  RWMatrixRef<DATA_TYPE_FLOAT16, 128, 64, MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL>
-      matrix = {RWBuf, 0, 0};
-
-  // CHECK: call void @dx.op.outerProductAccumulate.v128f16.v64f16(i32 307, <128 x half> %{{.+}}, <64 x half> %{{.+}}, %dx.types.Handle %{{.+}}, i32 0, i32 8, i32 3, i32 0)
-
-  OuterProductAccumulate(Input1, Input2, matrix);  
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/vectoraccumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/coopvec/vectoraccumulate.hlsl
deleted file mode 100644
index 5cfbff72e9..0000000000
--- a/tools/clang/test/CodeGenDXIL/hlsl/coopvec/vectoraccumulate.hlsl
+++ /dev/null
@@ -1,15 +0,0 @@
-// REQUIRES: dxil-1-10
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 %s | FileCheck %s
-
-#include <dx/coopvec.h>
-
-RWByteAddressBuffer RWBuf;
-
-export void Test5(vector<half, 128> Input) {
-  using namespace dx::coopvec;
-
-  RWBuf.Store<vector<half, 128> >(0, Input);
-
-  // CHECK: call void @dx.op.vectorAccumulate.v128f32(i32 308, <128 x float> %{{.*}}, %dx.types.Handle %{{.*}}, i32 0)
-  VectorAccumulate(Input, RWBuf, 0);
-}
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/clusterid.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/clusterid.hlsl
index 947da6a29b..d95b5ed143 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/clusterid.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/clusterid.hlsl
@@ -7,24 +7,24 @@
 
 // AST: `-CXXMethodDecl {{.*}} used GetClusterID 'unsigned int ()' extern
 // AST-NEXT: {{.*}}|-TemplateArgument type 'unsigned int'
-// AST-NEXT: {{.*}}|-HLSLIntrinsicAttr {{.*}} Implicit "op" "" 400
+// AST-NEXT: {{.*}}|-HLSLIntrinsicAttr {{.*}} Implicit "op" "" 396
 // AST-NEXT: {{.*}}|-ConstAttr {{.*}} Implicit
 // AST-NEXT: {{.*}}`-AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
 
 // AST: `-CXXMethodDecl {{.*}} used CandidateClusterID 'unsigned int ()' extern
 // AST-NEXT: {{.*}}|-TemplateArgument type 'unsigned int'
-// AST-NEXT: {{.*}}|-HLSLIntrinsicAttr {{.*}} Implicit "op" "" 398
+// AST-NEXT: {{.*}}|-HLSLIntrinsicAttr {{.*}} Implicit "op" "" 394
 // AST-NEXT: {{.*}}|-PureAttr {{.*}} Implicit
 // AST-NEXT: {{.*}}`-AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
 
 // AST: `-CXXMethodDecl {{.*}} used CommittedClusterID 'unsigned int ()' extern
 // AST-NEXT: {{.*}}|-TemplateArgument type 'unsigned int'
-// AST-NEXT: {{.*}}|-HLSLIntrinsicAttr {{.*}} Implicit "op" "" 399
+// AST-NEXT: {{.*}}|-HLSLIntrinsicAttr {{.*}} Implicit "op" "" 395
 // AST-NEXT: {{.*}}|-PureAttr {{.*}} Implicit
 // AST-NEXT: {{.*}}`-AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
 
 // AST: -FunctionDecl {{.*}} implicit used ClusterID 'unsigned int ()' extern
-// AST-NEXT: {{.*}}|-HLSLIntrinsicAttr {{.*}} Implicit "op" "" 397
+// AST-NEXT: {{.*}}|-HLSLIntrinsicAttr {{.*}} Implicit "op" "" 393
 // AST-NEXT: {{.*}}|-ConstAttr {{.*}} Implicit
 // AST-NEXT: {{.*}}|-AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
 // AST-NEXT: {{.*}}`-HLSLBuiltinCallAttr {{.*}} Implicit
@@ -41,7 +41,7 @@ struct [raypayload] Payload {
 // CHECK: call void @dx.op.rawBufferStore.i32
 
 // FCGL-LABEL: define void @{{.*}}test_cluster_id{{.*}}(
-// FCGL: call i32 @"dx.hl.op.rn.i32 (i32)"(i32 397)
+// FCGL: call i32 @"dx.hl.op.rn.i32 (i32)"(i32 393)
 [shader("closesthit")]
 void test_cluster_id(inout Payload payload, in BuiltInTriangleIntersectionAttributes attr) {
   uint cid = ClusterID();
@@ -54,7 +54,7 @@ void test_cluster_id(inout Payload payload, in BuiltInTriangleIntersectionAttrib
 // CHECK: call void @dx.op.rawBufferStore.i32
 
 // FCGL-LABEL: define void @{{.*}}test_rayquery_candidate_cluster_id{{.*}}(
-// FCGL: call i32 @"dx.hl.op.ro.i32 (i32, %{{.*}}"(i32 398
+// FCGL: call i32 @"dx.hl.op.ro.i32 (i32, %{{.*}}"(i32 394
 [shader("raygeneration")]
 void test_rayquery_candidate_cluster_id() {
   RayQuery<RAY_FLAG_NONE> rq;
@@ -77,7 +77,7 @@ void test_rayquery_candidate_cluster_id() {
 // CHECK: call void @dx.op.rawBufferStore.i32
 
 // FCGL-LABEL: define void @{{.*}}test_rayquery_committed_cluster_id{{.*}}(
-// FCGL: call i32 @"dx.hl.op.ro.i32 (i32, %{{.*}}"(i32 399
+// FCGL: call i32 @"dx.hl.op.ro.i32 (i32, %{{.*}}"(i32 395
 [shader("raygeneration")]
 void test_rayquery_committed_cluster_id() {
   RayQuery<RAY_FLAG_NONE> rq;
@@ -99,7 +99,7 @@ void test_rayquery_committed_cluster_id() {
 // CHECK: call void @dx.op.rawBufferStore.i32
 
 // FCGL-LABEL: define void @{{.*}}test_hitobject_cluster_id{{.*}}(
-// FCGL: call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject{{.*}}"(i32 400
+// FCGL: call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject{{.*}}"(i32 396
 [shader("raygeneration")]
 void test_hitobject_cluster_id() {
   dx::HitObject ho = dx::HitObject::MakeNop();
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/triangle_positions.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/triangle_positions.hlsl
index b7fca3ebf3..431080f956 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/triangle_positions.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/triangle_positions.hlsl
@@ -5,24 +5,24 @@
 
 // AST: `-CXXMethodDecl {{.*}} <<invalid sloc>> <invalid sloc> used TriangleObjectPositions 'BuiltInTrianglePositions &()' extern
 // AST-NEXT:   |-TemplateArgument type 'BuiltInTrianglePositions'
-// AST-NEXT:   |-HLSLIntrinsicAttr {{.*}} <<invalid sloc>> Implicit "op" "" 404
+// AST-NEXT:   |-HLSLIntrinsicAttr {{.*}} <<invalid sloc>> Implicit "op" "" 400
 // AST-NEXT:   |-ConstAttr {{.*}} <<invalid sloc>> Implicit
 // AST-NEXT:   `-AvailabilityAttr {{.*}} <<invalid sloc>> Implicit  6.10 0 0 ""
 
 // AST: `-CXXMethodDecl {{.*}} <<invalid sloc>> <invalid sloc> used CandidateTriangleObjectPositions 'BuiltInTrianglePositions &()' extern
 // AST:   |-TemplateArgument type 'BuiltInTrianglePositions'
-// AST:   |-HLSLIntrinsicAttr {{.*}} <<invalid sloc>> Implicit "op" "" 402
+// AST:   |-HLSLIntrinsicAttr {{.*}} <<invalid sloc>> Implicit "op" "" 398
 // AST:   |-PureAttr {{.*}} <<invalid sloc>> Implicit
 // AST:   `-AvailabilityAttr {{.*}} <<invalid sloc>> Implicit  6.10 0 0 ""
 
 // AST `-CXXMethodDecl {{.*}} <<invalid sloc>> <invalid sloc> used CommittedTriangleObjectPositions 'BuiltInTrianglePositions &()' extern
 // AST   |-TemplateArgument type 'BuiltInTrianglePositions'
-// AST   |-HLSLIntrinsicAttr {{.*}} <<invalid sloc>> Implicit "op" "" 403
+// AST   |-HLSLIntrinsicAttr {{.*}} <<invalid sloc>> Implicit "op" "" 399
 // AST   |-PureAttr {{.*}} <<invalid sloc>> Implicit
 // AST   `-AvailabilityAttr {{.*}} <<invalid sloc>> Implicit  6.10 0 0 ""
 
 // AST: -FunctionDecl {{.*}} <<invalid sloc>> <invalid sloc> implicit used TriangleObjectPositions 'BuiltInTrianglePositions ()' extern
-// AST:  |-HLSLIntrinsicAttr {{.*}} <<invalid sloc>> Implicit "op" "" 401
+// AST:  |-HLSLIntrinsicAttr {{.*}} <<invalid sloc>> Implicit "op" "" 397
 // AST:  |-ConstAttr {{.*}} <<invalid sloc>> Implicit
 // AST:  |-AvailabilityAttr {{.*}} <<invalid sloc>> Implicit  6.10 0 0 ""
 // AST:  `-HLSLBuiltinCallAttr {{.*}} <<invalid sloc>> Implicit
@@ -42,7 +42,7 @@ struct [raypayload] Payload {
 // CHECK:   %{{.*}} = call <9 x float> @dx.op.triangleObjectPosition.f32(i32 -2147483641) ; TriangleObjectPosition()
   
 // FCGL-LABEL: define void {{.*}}ClosestHit
-// FCGL: call void @"dx.hl.op..void (i32, %struct.BuiltInTrianglePositions*)"(i32 401, %struct.BuiltInTrianglePositions* %{{.*}})
+// FCGL: call void @"dx.hl.op..void (i32, %struct.BuiltInTrianglePositions*)"(i32 397, %struct.BuiltInTrianglePositions* %{{.*}})
 [shader("closesthit")]
 void ClosestHit(inout Payload payload, in BuiltInTriangleIntersectionAttributes attr) {
     BuiltInTrianglePositions positions = TriangleObjectPositions();
@@ -54,7 +54,7 @@ void ClosestHit(inout Payload payload, in BuiltInTriangleIntersectionAttributes
 // CHECK:   %{{.*}} = call <9 x float> @dx.op.triangleObjectPosition.f32(i32 -2147483641) ; TriangleObjectPosition()
 
 // FCGL-LABEL: define void {{.*}}AnyHit
-// FCGL: call void @"dx.hl.op..void (i32, %struct.BuiltInTrianglePositions*)"(i32 401, %struct.BuiltInTrianglePositions* %{{.*}})
+// FCGL: call void @"dx.hl.op..void (i32, %struct.BuiltInTrianglePositions*)"(i32 397, %struct.BuiltInTrianglePositions* %{{.*}})
 [shader("anyhit")]
 void AnyHit(inout Payload payload, in BuiltInTriangleIntersectionAttributes attr) {
     BuiltInTrianglePositions positions = TriangleObjectPositions();
@@ -68,8 +68,8 @@ void AnyHit(inout Payload payload, in BuiltInTriangleIntersectionAttributes attr
 // CHECK: %{{.*}} = call <9 x float> @dx.op.rayQuery_CommittedTriangleObjectPosition.f32(i32 -2147483639, i32 %{{.*}})  ; RayQuery_CommittedTriangleObjectPosition(rayQueryHandle)
 
 // FCGL-LABEL: define void {{.*}}RayQueryTest
-// FCGL: %{{.*}} = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, %{{.*}}"(i32 402,
-// FCGL: %{{.*}} = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, %{{.*}}"(i32 403,
+// FCGL: %{{.*}} = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, %{{.*}}"(i32 398,
+// FCGL: %{{.*}} = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, %{{.*}}"(i32 399,
 [shader("compute")]
 [numthreads(1, 1, 1)]
 void RayQueryTest() {
@@ -101,7 +101,7 @@ void RayQueryTest() {
 // CHECK: %{{.*}} = call <9 x float> @dx.op.hitObject_TriangleObjectPosition.f32(i32 -2147483638, %dx.types.HitObject %{{.*}})  ; HitObject_TriangleObjectPosition(hitObject)
 
 // FCGL-LABEL: define void {{.*}}HitObjectTest
-// FCGL: %{{.*}} = call %struct.BuiltInTrianglePositions* @"dx.hl.op.rn.%struct.BuiltInTrianglePositions* (i32, %dx.types.HitObject*)"(i32 404, %dx.types.HitObject* %{{.*}})
+// FCGL: %{{.*}} = call %struct.BuiltInTrianglePositions* @"dx.hl.op.rn.%struct.BuiltInTrianglePositions* (i32, %dx.types.HitObject*)"(i32 400, %dx.types.HitObject* %{{.*}})
 [shader("raygeneration")]
 void HitObjectTest() {
     RayDesc ray;
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/copyconvertmatrix/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/copyconvertmatrix/nominal.hlsl
index 7b2edd4106..5b6d8d95e5 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/copyconvertmatrix/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/copyconvertmatrix/nominal.hlsl
@@ -11,7 +11,7 @@ void main() {
   // CHECK-SAME: ; LinAlgCopyConvertMatrix(srcMatrix,transpose)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, %dx.types.LinAlgMatrixC2M5N4U1S2, i1)"
-  // CHECK2-SAME: (i32 405, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, %dx.types.LinAlgMatrixC2M5N4U1S2 {{.*}}, i1 false)
+  // CHECK2-SAME: (i32 401, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, %dx.types.LinAlgMatrixC2M5N4U1S2 {{.*}}, i1 false)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(2, 5, 4, 1, 2)]] mat1;
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2;
   __builtin_LinAlg_CopyConvertMatrix(mat2, mat1, false);
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl
index 3ff7357f08..1c3ab9ac4e 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/fillmatrix/nominal.hlsl
@@ -10,7 +10,7 @@ void main() {
   // CHECK-SAME: (i32 -2147483636, i32 {{.*}})  ; LinAlgFillMatrix(value)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, i32)"
-  // CHECK2-SAME: (i32 406, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, i32 5),
+  // CHECK2-SAME: (i32 402, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, i32 5),
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1;
   __builtin_LinAlg_FillMatrix(mat1, 5);
   
@@ -18,7 +18,7 @@ void main() {
   // CHECK-SAME: (i32 -2147483636, float {{.*}})  ; LinAlgFillMatrix(value)
   
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC5M3N4U0S0*, float)"
-  // CHECK2-SAME: (i32 406, %dx.types.LinAlgMatrixC5M3N4U0S0* {{.*}}, float 0x40091EB860000000)
+  // CHECK2-SAME: (i32 402, %dx.types.LinAlgMatrixC5M3N4U0S0* {{.*}}, float 0x40091EB860000000)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(5, 3, 4, 0, 0)]] mat2;
   __builtin_LinAlg_FillMatrix(mat2, 3.14);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl
index 56bde166c6..6a9490e624 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulate/nominal.hlsl
@@ -14,7 +14,7 @@ void main() {
   // CHECK-SAME: (i32 -2147483624, %dx.types.LinAlgMatrixC1M1N1U0S0 {{.*}}, %dx.types.LinAlgMatrixC5M3N4U0S0 {{.*}}) ; LinAlgMatrixAccumulate(matrixLHS,matrixRHS)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC2M2N2U2S2*, %dx.types.LinAlgMatrixC1M1N1U0S0,
-  // CHECK2-SAME:  %dx.types.LinAlgMatrixC5M3N4U0S0)"(i32 415, %dx.types.LinAlgMatrixC2M2N2U2S2* %mat3,
+  // CHECK2-SAME:  %dx.types.LinAlgMatrixC5M3N4U0S0)"(i32 411, %dx.types.LinAlgMatrixC2M2N2U2S2* %mat3,
   // CHECK2-SAME: %dx.types.LinAlgMatrixC1M1N1U0S0 %{{[0-9]+}}, %dx.types.LinAlgMatrixC5M3N4U0S0 %{{[0-9]+}})
   __builtin_LinAlg_MatrixAccumulate(mat3, mat2, mat1);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl
index 39de2ed2c9..3c9c4ab3ef 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetodescriptor/nominal.hlsl
@@ -13,7 +13,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatrixAccumulateToDescriptor(matrix,handle,offset,stride,layout,align)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32, i32)"
-  // CHECK2-SAME: (i32 419, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 5, i32 5, i32 5, i32 4)
+  // CHECK2-SAME: (i32 415, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 5, i32 5, i32 5, i32 4)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat;
   __builtin_LinAlg_MatrixAccumulateToDescriptor(mat, outbuf, 5, 5, 5, 4);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl
index 07222e0fbe..bf6ebb6e77 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixaccumulatetomemory/nominal.hlsl
@@ -15,7 +15,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatrixAccumulateToMemory(matrix,memory,offset,stride,layout)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2,
-  // CHECK2-SAME: [64 x float] addrspace(3)*, i32, i32, i32)"(i32 420, 
+  // CHECK2-SAME: [64 x float] addrspace(3)*, i32, i32, i32)"(i32 416, 
   // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA",
   // CHECK2-SAME: i32 1, i32 2, i32 3)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat;
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetcoordinate/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetcoordinate/nominal.hlsl
index 43de77d686..3c44ae5370 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetcoordinate/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetcoordinate/nominal.hlsl
@@ -11,7 +11,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatrixGetCoordinate(matrix,threadLocalIndex)
 
   // CHECK2: call <2 x i32> @"dx.hl.op..<2 x i32> (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, i32)"
-  // CHECK2-SAME: (i32 407, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1)
+  // CHECK2-SAME: (i32 403, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat;
   uint2 coord = __builtin_LinAlg_MatrixGetCoordinate(mat, 1);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl
index f3a057737c..91de8cab31 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixgetelement/nominal.hlsl
@@ -13,7 +13,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatrixGetElement(matrix,threadLocalIndex)
 
   // CHECK2: call void @"dx.hl.op..void (i32, i32*, %dx.types.LinAlgMatrixC4M5N4U1S2, i32)"
-  // CHECK2-SAME: (i32 408, i32* %elem1, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 0)
+  // CHECK2-SAME: (i32 404, i32* %elem1, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 0)
   uint elem1;
   __builtin_LinAlg_MatrixGetElement(elem1, mat, 0);
 
@@ -22,7 +22,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatrixGetElement(matrix,threadLocalIndex)
 
   // CHECK2: call void @"dx.hl.op..void (i32, float*, %dx.types.LinAlgMatrixC4M5N4U1S2, i32)"
-  // CHECK2-SAME: (i32 408, float* %elem2, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1)
+  // CHECK2-SAME: (i32 404, float* %elem2, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1)
   float elem2;
   __builtin_LinAlg_MatrixGetElement(elem2, mat, 1);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixlength/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixlength/nominal.hlsl
index 5b79dfd237..44c2ce6136 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixlength/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixlength/nominal.hlsl
@@ -10,7 +10,7 @@ void main() {
   // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}})  ; LinAlgMatrixLength(matrix)
 
   // CHECK2: call i32 @"dx.hl.op..i32 (i32, %dx.types.LinAlgMatrixC4M5N4U1S2)"
-  // CHECK2-SAME: (i32 409, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}})
+  // CHECK2-SAME: (i32 405, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}})
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat;
   uint len = __builtin_LinAlg_MatrixLength(mat);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl
index a58ac98117..727ec19ca8 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfromdescriptor/nominal.hlsl
@@ -13,7 +13,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatrixLoadFromDescriptor(handle,offset,stride,layout,align)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC1M1N1U0S0*, %dx.types.Handle, i32, i32, i32, i32)
-  // CHECK2-SAME: "(i32 410, %dx.types.LinAlgMatrixC1M1N1U0S0* %mat, %dx.types.Handle {{.*}}, i32 0, i32 0, i32 0, i32 4)
+  // CHECK2-SAME: "(i32 406, %dx.types.LinAlgMatrixC1M1N1U0S0* %mat, %dx.types.Handle {{.*}}, i32 0, i32 0, i32 0, i32 4)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(1, 1, 1, 0, 0)]] mat;
   __builtin_LinAlg_MatrixLoadFromDescriptor(mat, inbuf, 0, 0, 0, 4);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl
index 3a0e114390..f3ef819052 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixloadfrommemory/nominal.hlsl
@@ -15,7 +15,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatrixLoadFromMemory(memory,offset,stride,layout)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, [64 x float] addrspace(3)*,
-  // CHECK2-SAME: i32, i32, i32)"(i32 411, %dx.types.LinAlgMatrixC4M5N4U1S2* %mat, [64 x float] addrspace(3)*
+  // CHECK2-SAME: i32, i32, i32)"(i32 407, %dx.types.LinAlgMatrixC4M5N4U1S2* %mat, [64 x float] addrspace(3)*
   // CHECK2-SAME: @"\01?SharedArr@@3PAMA", i32 1, i32 2, i32 3)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat;
   __builtin_LinAlg_MatrixLoadFromMemory(mat, SharedArr, 1, 2, 3);
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiply/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiply/nominal.hlsl
index 60fabd8337..73b901a17a 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiply/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiply/nominal.hlsl
@@ -10,7 +10,7 @@ void main() {
   // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}) ; LinAlgMatrixMultiply(matrixA,matrixB)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, %dx.types.LinAlgMatrixC4M5N4U1S2,
-  // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2)"(i32 416, %dx.types.LinAlgMatrixC4M5N4U1S2* %mat2,
+  // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2)"(i32 412, %dx.types.LinAlgMatrixC4M5N4U1S2* %mat2,
   // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N4U1S2 %{{[0-9]+}}, %dx.types.LinAlgMatrixC4M5N4U1S2 %{{[0-9]+}})
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1;
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2;
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/nominal.hlsl
index 87d03d0a30..86c2dd4db7 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixmatrixmultiplyaccumulate/nominal.hlsl
@@ -12,7 +12,7 @@ void main() {
   // CHECK-SAME: %dx.types.LinAlgMatrixC4M5N3U1S2 undef) ; LinAlgMatrixMultiplyAccumulate(matrixA,matrixB,matrixC)
   
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N3U1S2*, %dx.types.LinAlgMatrixC4M5N4U1S2,
-  // CHECK2-SAME: %dx.types.LinAlgMatrixC4M4N3U1S2, %dx.types.LinAlgMatrixC4M5N3U1S2)"(i32 417,
+  // CHECK2-SAME: %dx.types.LinAlgMatrixC4M4N3U1S2, %dx.types.LinAlgMatrixC4M5N3U1S2)"(i32 413,
   // CHECK2-SAME: %dx.types.LinAlgMatrixC4M5N3U1S2* {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 %{{[0-9]+}},
   // CHECK2-SAME: %dx.types.LinAlgMatrixC4M4N3U1S2 %{{[0-9]+}}, %dx.types.LinAlgMatrixC4M5N3U1S2 %{{[0-9]+}})
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1;
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixouterproduct/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixouterproduct/nominal.hlsl
index 1dfc5bd28e..92fa7c777f 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixouterproduct/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixouterproduct/nominal.hlsl
@@ -13,7 +13,7 @@ void main() {
   // CHECK-SAME: (i32 -2147483619, <4 x float> {{.*}}, <4 x float> {{.*}})  ; LinAlgMatrixOuterProduct(vectorA,vectorB)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC2M2N2U2S2*, <4 x float>, <4 x float>)"
-  // CHECK2: (i32 421, %dx.types.LinAlgMatrixC2M2N2U2S2* {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}})
+  // CHECK2: (i32 417, %dx.types.LinAlgMatrixC2M2N2U2S2* {{.*}}, <4 x float> {{.*}}, <4 x float> {{.*}})
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(2, 2, 2, 2, 2)]] mat;
   __builtin_LinAlg_MatrixOuterProduct(mat, lhs, rhs);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/nominal.hlsl
index 9d0cdaf097..86ca447ff8 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixqueryaccumulatorlayout/nominal.hlsl
@@ -8,6 +8,6 @@ void main() {
 
   // CHECK: call i32 @dx.op.linAlgMatrixQueryAccumulatorLayout(i32 -2147483626)  ; LinAlgMatrixQueryAccumulatorLayout()
 
-  // CHECK2: call i32 @"dx.hl.op..i32 (i32)"(i32 418)
+  // CHECK2: call i32 @"dx.hl.op..i32 (i32)"(i32 414)
   uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout();
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixsetelement/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixsetelement/nominal.hlsl
index 0ab34c4a8d..c1ee5f168e 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixsetelement/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixsetelement/nominal.hlsl
@@ -14,7 +14,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatrixSetElement(matrix,threadLocalIndex,value)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2*, %dx.types.LinAlgMatrixC4M5N4U1S2, i32, i32)
-  // CHECK2-SAME: "(i32 412, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1, i32 5)
+  // CHECK2-SAME: "(i32 408, %dx.types.LinAlgMatrixC4M5N4U1S2* {{.*}}, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, i32 1, i32 5)
 
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1;
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat2;
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl
index 84018dce7b..352a34ac1a 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretodescriptor/nominal.hlsl
@@ -13,7 +13,7 @@ void main() {
   // CHECK-SAME:  ; LinAlgMatrixStoreToDescriptor(matrix,handle,offset,stride,layout,align)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, %dx.types.Handle, i32, i32, i32, i32)
-  // CHECK2-SAME: "(i32 413, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 1, i32 1, i32 0, i32 4)
+  // CHECK2-SAME: "(i32 409, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, %dx.types.Handle {{.*}}, i32 1, i32 1, i32 0, i32 4)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat1;
   __builtin_LinAlg_MatrixStoreToDescriptor(mat1, outbuf, 1, 1, 0, 4);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl
index 7d6f3b120c..18e3c1cbc3 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixstoretomemory/nominal.hlsl
@@ -15,7 +15,7 @@ void main() {
   // CHECK-SAME: i32 0, i32 0), i32 1, i32 2, i32 3)  ; LinAlgMatrixStoreToMemory(matrix,memory,offset,stride,layout)
 
   // CHECK2: call void @"dx.hl.op..void (i32, %dx.types.LinAlgMatrixC4M5N4U1S2, [64 x float] addrspace(3)*, i32, i32, i32)"
-  // CHECK2-SAME: (i32 414, %dx.types.LinAlgMatrixC4M5N4U1S2 %{{.*}}, [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA",
+  // CHECK2-SAME: (i32 410, %dx.types.LinAlgMatrixC4M5N4U1S2 %{{.*}}, [64 x float] addrspace(3)* @"\01?SharedArr@@3PAMA",
   // CHECK2-SAME: i32 1, i32 2, i32 3)
   __builtin_LinAlgMatrix [[__LinAlgMatrix_Attributes(4, 5, 4, 1, 2)]] mat;
   __builtin_LinAlg_MatrixStoreToMemory(mat, SharedArr, 1, 2, 3);
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl
index 203026f9ed..d1cfae76c4 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiply/nominal.hlsl
@@ -15,6 +15,6 @@ void main() {
   // CHECK-SAME: float 3.000000e+00, float 4.000000e+00>, i32 1)  ; LinAlgMatVecMul(matrix,inputVector,interpretation)
 
   // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>*, %dx.types.LinAlgMatrixC4M5N4U1S2, <4 x float>, i32)
-  // CHECK2-SAME: "(i32 422, <4 x float>* %result, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, <4 x float> {{.*}}, i32 1)
+  // CHECK2-SAME: "(i32 418, <4 x float>* %result, %dx.types.LinAlgMatrixC4M5N4U1S2 {{.*}}, <4 x float> {{.*}}, i32 1)
   __builtin_LinAlg_MatrixVectorMultiply(result, mat, vec, 1);
 }
diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl
index d0be14a452..fc7b6d0762 100644
--- a/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl
+++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/builtins/matrixvectormultiplyadd/nominal.hlsl
@@ -15,7 +15,7 @@ void main() {
   // CHECK-SAME: ; LinAlgMatVecMulAdd(matrix,inputVector,inputInterpretation,biasVector,biasInterpretation)
 
   // CHECK2: call void @"dx.hl.op..void (i32, <4 x float>*, %dx.types.LinAlgMatrixC5M3N4U0S0, <4 x float>,
-  // CHECK2-SAME: i32, <4 x float>, i32)"(i32 423, <4 x float>* %result, %dx.types.LinAlgMatrixC5M3N4U0S0 %{{[0-9]+}},
+  // CHECK2-SAME: i32, <4 x float>, i32)"(i32 419, <4 x float>* %result, %dx.types.LinAlgMatrixC5M3N4U0S0 %{{[0-9]+}},
   // CHECK2-SAME: <4 x float> %{{[0-9]+}}, i32 1, <4 x float> %{{[0-9]+}}, i32 0)
 
   __builtin_LinAlg_MatrixVectorMultiplyAdd(result, mat, vec, 1, result, 0);
diff --git a/tools/clang/test/CodeGenSPIRV/coopvec/outerproductaccumulate-spirv-errors.hlsl b/tools/clang/test/CodeGenSPIRV/coopvec/outerproductaccumulate-spirv-errors.hlsl
deleted file mode 100644
index e42d6d51b0..0000000000
--- a/tools/clang/test/CodeGenSPIRV/coopvec/outerproductaccumulate-spirv-errors.hlsl
+++ /dev/null
@@ -1,19 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types -spirv %s -verify
-
-// Tests that the header file cannot be included for spirv compilations
-// This is a copy of \tools\clang\test\CodeGenDXIL\hlsl\linalg\outerproductaccumulate.hlsl
-// except that spirv is targeted
-
-// expected-error@dx/coopvec.h:4{{Cooperative vectors not (yet) supported for SPIRV}}
-#include <dx/coopvec.h>
-
-RWByteAddressBuffer RWBuf;
-
-export void Test4(vector<half, 128> Input1, vector<half, 64> Input2) {
-  using namespace dx::coopvec;
-
-  RWMatrixRef<DATA_TYPE_FLOAT16, 128, 64, MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL>
-      matrix = {RWBuf, 0, 0};
-
-  OuterProductAccumulate(Input1, Input2, matrix);  
-}
diff --git a/tools/clang/test/DXC/Passes/DxilGen/clusterid_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/clusterid_dxilgen.ll
index 20f6d71e4e..0f82fc5d71 100644
--- a/tools/clang/test/DXC/Passes/DxilGen/clusterid_dxilgen.ll
+++ b/tools/clang/test/DXC/Passes/DxilGen/clusterid_dxilgen.ll
@@ -48,7 +48,7 @@ define void @"\01?test_cluster_id@@YAXUPayload@@UBuiltInTriangleIntersectionAttr
 entry:
   %0 = getelementptr inbounds %struct.Payload, %struct.Payload* %payload, i32 0, i32 0
   %1 = load float, float* %0
-  %2 = call i32 @"dx.hl.op.rn.i32 (i32)"(i32 397), !dbg !38 ; line:56 col:14
+  %2 = call i32 @"dx.hl.op.rn.i32 (i32)"(i32 393), !dbg !38 ; line:56 col:14
   %3 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !42 ; line:57 col:3
   %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %3), !dbg !42 ; line:57 col:3
   %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !42 ; line:57 col:3
@@ -72,7 +72,7 @@ entry:
   %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !46 ; line:77 col:3
   call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rq3, %dx.types.Handle %1, i32 0, i32 255, <3 x float> zeroinitializer, float 0.000000e+00, <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, float 1.000000e+03), !dbg !46 ; line:77 col:3
   %2 = call i1 @"dx.hl.op..i1 (i32, i32)"(i32 322, i32 %rq3), !dbg !47 ; line:78 col:3
-  %3 = call i32 @"dx.hl.op.ro.i32 (i32, i32)"(i32 398, i32 %rq3), !dbg !48 ; line:79 col:14
+  %3 = call i32 @"dx.hl.op.ro.i32 (i32, i32)"(i32 394, i32 %rq3), !dbg !48 ; line:79 col:14
   %4 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !49 ; line:80 col:3
   %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %4), !dbg !49 ; line:80 col:3
   %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !49 ; line:80 col:3
@@ -87,7 +87,7 @@ entry:
   %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !53 ; line:100 col:3
   %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !53 ; line:100 col:3
   call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rq2, %dx.types.Handle %1, i32 0, i32 255, <3 x float> zeroinitializer, float 0.000000e+00, <3 x float> <float 0.000000e+00, float 0.000000e+00, float 1.000000e+00>, float 1.000000e+03), !dbg !53 ; line:100 col:3
-  %2 = call i32 @"dx.hl.op.ro.i32 (i32, i32)"(i32 399, i32 %rq2), !dbg !54 ; line:101 col:14
+  %2 = call i32 @"dx.hl.op.ro.i32 (i32, i32)"(i32 395, i32 %rq2), !dbg !54 ; line:101 col:14
   %3 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !55 ; line:102 col:3
   %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %3), !dbg !55 ; line:102 col:3
   %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !55 ; line:102 col:3
@@ -102,7 +102,7 @@ entry:
   %0 = bitcast %dx.types.HitObject* %ho to i8*, !dbg !57 ; line:114 col:3
   call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !57 ; line:114 col:3
   call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %ho), !dbg !59 ; line:114 col:22
-  %1 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 400, %dx.types.HitObject* %ho), !dbg !60 ; line:115 col:14
+  %1 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 396, %dx.types.HitObject* %ho), !dbg !60 ; line:115 col:14
   %2 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !61 ; line:116 col:3
   %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %2), !dbg !61 ; line:116 col:3
   %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !61 ; line:116 col:3
diff --git a/tools/clang/test/DXC/Passes/DxilGen/debugbreak.ll b/tools/clang/test/DXC/Passes/DxilGen/debugbreak.ll
index d1c70453d5..5101528d01 100644
--- a/tools/clang/test/DXC/Passes/DxilGen/debugbreak.ll
+++ b/tools/clang/test/DXC/Passes/DxilGen/debugbreak.ll
@@ -18,7 +18,7 @@ entry:
   br i1 %cmp, label %if.then, label %if.end
 
 if.then:
-  call void @"dx.hl.op..void (i32)"(i32 424)
+  call void @"dx.hl.op..void (i32)"(i32 420)
   br label %if.end
 
 if.end:
diff --git a/tools/clang/test/DXC/Passes/DxilGen/group-wave-index.ll b/tools/clang/test/DXC/Passes/DxilGen/group-wave-index.ll
index a019e31a83..78774e1653 100644
--- a/tools/clang/test/DXC/Passes/DxilGen/group-wave-index.ll
+++ b/tools/clang/test/DXC/Passes/DxilGen/group-wave-index.ll
@@ -20,8 +20,8 @@ target triple = "dxil-ms-dx"
 ; Function Attrs: nounwind
 define void @main(<3 x i32> %id) #0 {
 entry:
-  %0 = call i32 @"dx.hl.op.rn.i32 (i32)"(i32 396)
-  %1 = call i32 @"dx.hl.op.rn.i32 (i32)"(i32 395)
+  %0 = call i32 @"dx.hl.op.rn.i32 (i32)"(i32 392)
+  %1 = call i32 @"dx.hl.op.rn.i32 (i32)"(i32 391)
   %2 = load %"class.RWStructuredBuffer<unsigned int>", %"class.RWStructuredBuffer<unsigned int>"* @"\01?output0@@3V?$RWStructuredBuffer@I@@A"
   %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer<unsigned int>\22)"(i32 0, %"class.RWStructuredBuffer<unsigned int>" %2)
   %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer<unsigned int>\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 4108, i32 4 }, %"class.RWStructuredBuffer<unsigned int>" zeroinitializer)
diff --git a/tools/clang/test/DXC/Passes/DxilGen/isdebuggerpresent.ll b/tools/clang/test/DXC/Passes/DxilGen/isdebuggerpresent.ll
index 1d9e8539a4..8d2bb4a969 100644
--- a/tools/clang/test/DXC/Passes/DxilGen/isdebuggerpresent.ll
+++ b/tools/clang/test/DXC/Passes/DxilGen/isdebuggerpresent.ll
@@ -19,7 +19,7 @@ target triple = "dxil-ms-dx"
 ; Function Attrs: nounwind
 define void @main(<3 x i32> %threadId) #0 {
 entry:
-  %0 = call i1 @"dx.hl.op.ro.i1 (i32)"(i32 425)
+  %0 = call i1 @"dx.hl.op.ro.i1 (i32)"(i32 421)
   br i1 %0, label %if.then, label %if.else
 
 if.then:
diff --git a/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll b/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll
deleted file mode 100644
index ea1be46c4c..0000000000
--- a/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll
+++ /dev/null
@@ -1,189 +0,0 @@
-; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s
-; REQUIRES: dxil-1-9
-
-target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%struct.ByteAddressBuffer = type { i32 }
-%struct.RWByteAddressBuffer = type { i32 }
-%dx.types.Handle = type { i8* }
-%dx.types.ResourceProperties = type { i32, i32 }
-
-@"\01?input_vector_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4
-@"\01?opa_input_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4
-@"\01?matrix_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4
-@"\01?bias_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4
-@"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4
-@"\01?output_vector_buffer@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4
-
-; Function Attrs: nounwind
-define void @cs_main() #0 {
-entry:
-  ;CHECK-DAG: %[[MLD:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A"
-  ;CHECK-DAG: %[[BLD:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A"
-  ;CHECK-DAG: %[[RWMLD0:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A"
-  %output_vector = alloca <4 x float>, align 4
-  %tmp = bitcast <4 x float>* %output_vector to i8*, !dbg !21 ; line:14 col:5
-  call void @llvm.lifetime.start(i64 16, i8* %tmp) #0, !dbg !21 ; line:14 col:5
-  %tmp1 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?input_vector_buffer@@3UByteAddressBuffer@@A", !dbg !25 ; line:17 col:37
-  %tmp2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp1), !dbg !25 ; line:17 col:37
-  %tmp3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !25 ; line:17 col:37
-  %tmp4 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp3, i32 0), !dbg !25 ; line:17 col:37
-  %tmp5 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !dbg !26 ; line:33 col:5
-  %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp5), !dbg !26 ; line:33 col:5
-  %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !26 ; line:33 col:5
-
-  ;CHECK: %[[MCH0:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[MLD]]
-  ;CHECK: %[[MAH0:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[MCH0]]
-  ;CHECK: call <4 x float> @dx.op.matVecMul.v4f32.v4f32(i32 305, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH0]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, i1 false) 
-  call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x float>* %output_vector, i1 false, <4 x float> %tmp4, i1 false, i32 9, %dx.types.Handle %tmp7, i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64), !dbg !26 ; line:33 col:5
-
-  %tmp8 = load <4 x float>, <4 x float>* %output_vector, align 4, !dbg !27, !tbaa !28 ; line:37 col:35
-  %tmp9 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !dbg !31 ; line:37 col:5
-  %tmp10 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp9), !dbg !31 ; line:37 col:5
-  %tmp11 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp10, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !31 ; line:37 col:5
-  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp11, i32 0, <4 x float> %tmp8), !dbg !31 ; line:37 col:5
-  %tmp12 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !dbg !32 ; line:49 col:5
-  %tmp13 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp12), !dbg !32 ; line:49 col:5
-  %tmp14 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp13, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !32 ; line:49 col:5
-  %tmp15 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A", !dbg !32 ; line:49 col:5
-  %tmp16 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp15), !dbg !32 ; line:49 col:5
-  %tmp17 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !32 ; line:49 col:5
-
-  ;CHECK: %[[MCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[MLD]]
-  ;CHECK: %[[MAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[MCH1]]
-  ;CHECK: %[[BCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[BLD]]
-  ;CHECK: %[[BAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[BCH1]]
-  ;CHECK: call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH1]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %[[BAH1]], i32 0, i32 9, i1 false)
-  call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x float>* %output_vector, i1 false, <4 x float> %tmp4, i1 false, i32 9, %dx.types.Handle %tmp14, i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %tmp17, i32 0, i32 9), !dbg !32 ; line:49 col:5
-  
-  %tmp18 = load <4 x float>, <4 x float>* %output_vector, align 4, !dbg !33, !tbaa !28 ; line:54 col:38
-  %tmp19 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !dbg !34 ; line:54 col:5
-  %tmp20 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp19), !dbg !34 ; line:54 col:5
-  %tmp21 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp20, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !34 ; line:54 col:5
-  call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp21, i32 1024, <4 x float> %tmp18), !dbg !34 ; line:54 col:5
-  %tmp22 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !dbg !35 ; line:56 col:37
-  %tmp23 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp22), !dbg !35 ; line:56 col:37
-  %tmp24 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !35 ; line:56 col:37
-  %tmp25 = call <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp24, i32 0), !dbg !35 ; line:56 col:37
-  %tmp26 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !dbg !36 ; line:57 col:37
-  %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp26), !dbg !36 ; line:57 col:37
-  %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !36 ; line:57 col:37
-  %tmp29 = call <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp28, i32 128), !dbg !36 ; line:57 col:37
-  %tmp30 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !dbg !37 ; line:67 col:5
-  %tmp31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp30), !dbg !37 ; line:67 col:5
-  %tmp32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp31, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !37 ; line:67 col:5
-
-  ;CHECK: %[[RWMCH0:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer %[[RWMLD0]]
-  ;CHECK: %[[RWMAH0:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RWMCH0]]
-  ;CHECK: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[RWMAH0]], i32 0, i32 5, i32 3, i32 0)
-  call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %tmp25, <8 x i32> %tmp29, %dx.types.Handle %tmp32, i32 0, i32 5, i32 3, i32 0), !dbg !37 ; line:67 col:5
-
-  
-  %tmp33 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !dbg !38 ; line:77 col:5
-  %tmp34 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp33), !dbg !38 ; line:77 col:5
-  %tmp35 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp34, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !38 ; line:77 col:5
-
-  ;CHECK: %[[RWMCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer %[[RWMLD0]]
-  ;CHECK: %[[RWMAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RWMCH1]]
-  ;CHECK: call void @dx.op.vectorAccumulate.v8i32(i32 308, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[RWMAH1]], i32 0)
-  call void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32 393, <8 x i32> %tmp25, %dx.types.Handle %tmp35, i32 0), !dbg !38 ; line:77 col:5
-
-  %tmp36 = bitcast <4 x float>* %output_vector to i8*, !dbg !39 ; line:79 col:1
-  call void @llvm.lifetime.end(i64 16, i8* %tmp36) #0, !dbg !39 ; line:79 col:1
-  ret void, !dbg !39 ; line:79 col:1
-}
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #0
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) #0
-
-; Function Attrs: nounwind readonly
-declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32, %struct.ByteAddressBuffer) #2
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer) #2
-
-; Function Attrs: nounwind
-declare void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32) #0
-
-; Function Attrs: nounwind
-declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32, %dx.types.Handle, i32, <4 x float>) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #2
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #2
-
-; Function Attrs: nounwind
-declare void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind
-declare void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind
-declare void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32, <8 x i32>, %dx.types.Handle, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone }
-
-!llvm.module.flags = !{!0}
-!pauseresume = !{!1}
-!dx.version = !{!2}
-!dx.valver = !{!2}
-!dx.shaderModel = !{!3}
-!dx.typeAnnotations = !{!4}
-!dx.entryPoints = !{!8}
-!dx.fnprops = !{!18}
-!dx.options = !{!19, !20}
-
-!0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"}
-!2 = !{i32 1, i32 9}
-!3 = !{!"cs", i32 6, i32 9}
-!4 = !{i32 1, void ()* @cs_main, !5}
-!5 = !{!6}
-!6 = !{i32 1, !7, !7}
-!7 = !{}
-!8 = !{void ()* @cs_main, !"cs_main", null, !9, null}
-!9 = !{!10, !15, null, null}
-!10 = !{!11, !12, !13, !14}
-!11 = !{i32 0, %struct.ByteAddressBuffer* @"\01?input_vector_buffer@@3UByteAddressBuffer@@A", !"input_vector_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null}
-!12 = !{i32 1, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !"opa_input_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null}
-!13 = !{i32 2, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !"matrix_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null}
-!14 = !{i32 3, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A", !"bias_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null}
-!15 = !{!16, !17}
-!16 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !"rw_matrix_buffer", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
-!17 = !{i32 1, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !"output_vector_buffer", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null}
-!18 = !{void ()* @cs_main, i32 5, i32 1, i32 1, i32 1}
-!19 = !{i32 -2147483584}
-!20 = !{i32 -1}
-!21 = !DILocation(line: 14, column: 5, scope: !22)
-!22 = !DISubprogram(name: "cs_main", scope: !23, file: !23, line: 12, type: !24, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @cs_main)
-!23 = !DIFile(filename: "DirectXShaderCompiler\5Ctools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cintrinsics\5Clinalg_builtins\5Clinalg-builtins.hlsl", directory: "")
-!24 = !DISubroutineType(types: !7)
-!25 = !DILocation(line: 17, column: 37, scope: !22)
-!26 = !DILocation(line: 33, column: 5, scope: !22)
-!27 = !DILocation(line: 37, column: 35, scope: !22)
-!28 = !{!29, !29, i64 0}
-!29 = !{!"omnipotent char", !30, i64 0}
-!30 = !{!"Simple C/C++ TBAA"}
-!31 = !DILocation(line: 37, column: 5, scope: !22)
-!32 = !DILocation(line: 49, column: 5, scope: !22)
-!33 = !DILocation(line: 54, column: 38, scope: !22)
-!34 = !DILocation(line: 54, column: 5, scope: !22)
-!35 = !DILocation(line: 56, column: 37, scope: !22)
-!36 = !DILocation(line: 57, column: 37, scope: !22)
-!37 = !DILocation(line: 67, column: 5, scope: !22)
-!38 = !DILocation(line: 77, column: 5, scope: !22)
-!39 = !DILocation(line: 79, column: 1, scope: !22)
diff --git a/tools/clang/test/DXC/Passes/DxilGen/triangle_positions_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/triangle_positions_dxilgen.ll
index fcc5517c15..218c4882cf 100644
--- a/tools/clang/test/DXC/Passes/DxilGen/triangle_positions_dxilgen.ll
+++ b/tools/clang/test/DXC/Passes/DxilGen/triangle_positions_dxilgen.ll
@@ -57,7 +57,7 @@ entry:
   %positions = alloca %struct.BuiltInTrianglePositions, align 4
   %0 = bitcast %struct.BuiltInTrianglePositions* %positions to i8*, !dbg !48 ; line:48 col:5
   call void @llvm.lifetime.start(i64 36, i8* %0) #0, !dbg !48 ; line:48 col:5
-  call void @"dx.hl.op..void (i32, %struct.BuiltInTrianglePositions*)"(i32 401, %struct.BuiltInTrianglePositions* %positions), !dbg !52 ; line:48 col:42
+  call void @"dx.hl.op..void (i32, %struct.BuiltInTrianglePositions*)"(i32 397, %struct.BuiltInTrianglePositions* %positions), !dbg !52 ; line:48 col:42
   %p0 = getelementptr inbounds %struct.BuiltInTrianglePositions, %struct.BuiltInTrianglePositions* %positions, i32 0, i32 0, !dbg !53 ; line:49 col:38
   %1 = load <3 x float>, <3 x float>* %p0, align 4, !dbg !54 ; line:49 col:28
   %2 = extractelement <3 x float> %1, i32 0, !dbg !54 ; line:49 col:28
@@ -92,7 +92,7 @@ entry:
   %1 = load <4 x float>, <4 x float>* %0
   %2 = bitcast %struct.BuiltInTrianglePositions* %positions to i8*, !dbg !61 ; line:60 col:5
   call void @llvm.lifetime.start(i64 36, i8* %2) #0, !dbg !61 ; line:60 col:5
-  call void @"dx.hl.op..void (i32, %struct.BuiltInTrianglePositions*)"(i32 401, %struct.BuiltInTrianglePositions* %positions), !dbg !63 ; line:60 col:42
+  call void @"dx.hl.op..void (i32, %struct.BuiltInTrianglePositions*)"(i32 397, %struct.BuiltInTrianglePositions* %positions), !dbg !63 ; line:60 col:42
   %p0 = getelementptr inbounds %struct.BuiltInTrianglePositions, %struct.BuiltInTrianglePositions* %positions, i32 0, i32 0, !dbg !64 ; line:61 col:19
   %3 = load <3 x float>, <3 x float>* %p0, align 4, !dbg !65 ; line:61 col:9
   %4 = extractelement <3 x float> %3, i32 0, !dbg !65 ; line:61 col:9
@@ -130,7 +130,7 @@ while.body:                                       ; preds = %entry, %while.cond.
   br i1 %cmp, label %if.then, label %while.cond.backedge, !dbg !74 ; line:87 col:13
 
 if.then:                                          ; preds = %while.body
-  %5 = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, i32)"(i32 402, i32 %q14), !dbg !76 ; line:88 col:53
+  %5 = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, i32)"(i32 398, i32 %q14), !dbg !76 ; line:88 col:53
   %6 = getelementptr inbounds %struct.BuiltInTrianglePositions, %struct.BuiltInTrianglePositions* %5, i32 0, i32 0, !dbg !76 ; line:88 col:53
   %7 = load <3 x float>, <3 x float>* %6, !dbg !76 ; line:88 col:53
   %8 = extractelement <3 x float> %7, i32 0, !dbg !77 ; line:89 col:36
@@ -151,7 +151,7 @@ while.end:                                        ; preds = %while.cond.backedge
   br i1 %cmp7, label %if.then.10, label %if.end.13, !dbg !81 ; line:93 col:9
 
 if.then.10:                                       ; preds = %while.end
-  %15 = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, i32)"(i32 403, i32 %q14), !dbg !83 ; line:94 col:49
+  %15 = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, i32)"(i32 399, i32 %q14), !dbg !83 ; line:94 col:49
   %16 = getelementptr inbounds %struct.BuiltInTrianglePositions, %struct.BuiltInTrianglePositions* %15, i32 0, i32 1, !dbg !83 ; line:94 col:49
   %17 = load <3 x float>, <3 x float>* %16, !dbg !83 ; line:94 col:49
   %18 = extractelement <3 x float> %17, i32 1, !dbg !84 ; line:95 col:32
@@ -185,7 +185,7 @@ entry:
   br i1 %8, label %if.then, label %if.end, !dbg !92 ; line:117 col:9
 
 if.then:                                          ; preds = %entry
-  %9 = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, %dx.types.HitObject*)"(i32 404, %dx.types.HitObject* %hit), !dbg !93 ; line:118 col:46
+  %9 = call %struct.BuiltInTrianglePositions* @"dx.hl.op.ro.%struct.BuiltInTrianglePositions* (i32, %dx.types.HitObject*)"(i32 400, %dx.types.HitObject* %hit), !dbg !93 ; line:118 col:46
   %10 = getelementptr inbounds %struct.BuiltInTrianglePositions, %struct.BuiltInTrianglePositions* %9, i32 0, i32 0, !dbg !93 ; line:118 col:46
   %11 = load <3 x float>, <3 x float>* %10, !dbg !93 ; line:118 col:46
   %12 = getelementptr inbounds %struct.BuiltInTrianglePositions, %struct.BuiltInTrianglePositions* %9, i32 0, i32 1, !dbg !93 ; line:118 col:46
diff --git a/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/basic/debugbreak.hlsl b/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/basic/debugbreak.hlsl
index afa6f34d3b..282384ea3d 100644
--- a/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/basic/debugbreak.hlsl
+++ b/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/basic/debugbreak.hlsl
@@ -5,7 +5,7 @@
 // RUN: %dxc -T cs_6_10 -ast-dump %s | FileCheck %s --check-prefix=AST
 
 // CHECK: call void @dx.op.debugBreak(i32 -2147483615)  ; DebugBreak()
-// FCGL: call void @"dx.hl.op..void (i32)"(i32 424)
+// FCGL: call void @"dx.hl.op..void (i32)"(i32 420)
 
 // AST: CallExpr {{.*}} 'void'
 // AST-NEXT: `-ImplicitCastExpr {{.*}} 'void (*)()' <FunctionToPointerDecay>
diff --git a/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/basic/isdebuggerpresent.hlsl b/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/basic/isdebuggerpresent.hlsl
index b9e0096633..aa43b6f253 100644
--- a/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/basic/isdebuggerpresent.hlsl
+++ b/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/basic/isdebuggerpresent.hlsl
@@ -6,7 +6,7 @@
 
 // CHECK: call i1 @dx.op.isDebuggerPresent(i32 -2147483614)  ; IsDebuggerPresent()
 
-// FCGL: call i1 @"dx.hl.op.ro.i1 (i32)"(i32 425)
+// FCGL: call i1 @"dx.hl.op.ro.i1 (i32)"(i32 421)
 
 // AST: CallExpr {{.*}} 'bool'
 // AST-NEXT: `-ImplicitCastExpr {{.*}} 'bool (*)()' <FunctionToPointerDecay>
diff --git a/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/wave/group-wave-index.hlsl b/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/wave/group-wave-index.hlsl
index 263f65b2c2..3469dc24a8 100644
--- a/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/wave/group-wave-index.hlsl
+++ b/tools/clang/test/HLSLFileCheckLit/hlsl/intrinsics/wave/group-wave-index.hlsl
@@ -3,8 +3,8 @@
 // RUN: %dxc -T cs_6_10 -E main -fcgl %s | FileCheck %s --check-prefix=FCGL
 // RUN: %dxc -T cs_6_10 -E main %s | FileCheck %s
 
-// FCGL: call i32 @"dx.hl.op.rn.i32 (i32)"(i32 396)
-// FCGL: call i32 @"dx.hl.op.rn.i32 (i32)"(i32 395)
+// FCGL: call i32 @"dx.hl.op.rn.i32 (i32)"(i32 392)
+// FCGL: call i32 @"dx.hl.op.rn.i32 (i32)"(i32 391)
 
 // CHECK: %[[Index:[^ ]+]] = call i32 @dx.op.getGroupWaveIndex(i32 -2147483647)  ; GetGroupWaveIndex()
 // CHECK: %[[Count:[^ ]+]] = call i32 @dx.op.getGroupWaveCount(i32 -2147483646)  ; GetGroupWaveCount()
diff --git a/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll
deleted file mode 100644
index ed738aacca..0000000000
--- a/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; REQUIRES: dxil-1-10
-; RUN: not %dxv %s 2>&1 | FileCheck %s
-
-; Original Source: \tools\clang\test\CodeGenHLSL\linalg\outer-product-accumulate-matrix-layout.hlsl
-; The failing tests were generated by manually editing the IR produced from the IR from the passing
-; case generated by running the hlsl above (Original Source)
-
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.ResBind = type { i32, i32, i32, i8 }
-%dx.types.ResourceProperties = type { i32, i32 }
-%dx.types.ResRet.v8f16 = type { <8 x half>, i32 }
-%struct.ByteAddressBuffer = type { i32 }
-%struct.RWByteAddressBuffer = type { i32 }
-
-; As noted in other tests, the validation errors come out in
-; an order different from the IR. So listed them here in the
-; order they appear and added comments for correlation
-
-;CHECK: error: matrix stride must be a constant zero for optimal layouts
-;CHECK: error: matrix stride must be a constant zero for optimal layouts
-;CHECK-NOT: error: matrix layout value 'OuterProductOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
-;CHECK: error: matrix layout value 'MulOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
-;CHECK: error: matrix layout value 'ColumnMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
-;CHECK: error: matrix layout value 'RowMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
-; CHECK: Validation failed.
-
-define void @main() {
-  %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false)  ; CreateHandleFromBinding(bind,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)  ; CreateHandleFromBinding(bind,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false)  ; CreateHandleFromBinding(bind,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 11, i32 0 })  ; AnnotateHandle(res,props)  resource: ByteAddressBuffer
-  %5 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %4, i32 0, i32 undef, i32 2)  ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
-  %6 = extractvalue %dx.types.ResRet.v8f16 %5, 0
-  %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 11, i32 0 })  ; AnnotateHandle(res,props)  resource: ByteAddressBuffer
-  %8 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %7, i32 0, i32 undef, i32 2)  ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
-  %9 = extractvalue %dx.types.ResRet.v8f16 %8, 0
-  %10 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })  ; AnnotateHandle(res,props)  resource: RWByteAddressBuffer
-  ; error: matrix layout value 'RowMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
-  call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 0, i32 0)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-  ; error: matrix layout value 'ColumnMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
-  call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 1, i32 0)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-  ; matrix layout value 'MulOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal'
-  call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 2, i32 0)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-  ; error: matrix stride must be a constant zero for optimal layouts
-  call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 64)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-  ; error: matrix stride must be a constant zero for optimal layouts
-  call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 63)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-  ret void
-}
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32, %dx.types.Handle, i32, i32, i32) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.outerProductAccumulate.v8f16.v8f16(i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone }
-
-!dx.version = !{!0}
-!dx.valver = !{!0}
-!dx.shaderModel = !{!1}
-!dx.resources = !{!2}
-!dx.entryPoints = !{!8}
-
-!0 = !{i32 1, i32 9}
-!1 = !{!"cs", i32 6, i32 10}
-!2 = !{!3, !6, null, null}
-!3 = !{!4, !5}
-!4 = !{i32 0, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i32 0, null}
-!5 = !{i32 1, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 1, i32 1, i32 11, i32 0, null}
-!6 = !{!7}
-!7 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null}
-!8 = !{void ()* @main, !"main", null, !2, !9}
-!9 = !{i32 0, i64 8598323216, i32 4, !10}
-!10 = !{i32 1, i32 1, i32 1}
diff --git a/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll
deleted file mode 100644
index 74ec587710..0000000000
--- a/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; REQUIRES: dxil-1-10
-; RUN: %dxv %s 2>&1 | FileCheck %s
-
-;Original Source: \tools\clang\test\CodeGenHLSL\linalg\outer-product-accumulate-matrix-layout.hlsl
-
-target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64"
-target triple = "dxil-ms-dx"
-
-%dx.types.Handle = type { i8* }
-%dx.types.ResBind = type { i32, i32, i32, i8 }
-%dx.types.ResourceProperties = type { i32, i32 }
-%dx.types.ResRet.v8f16 = type { <8 x half>, i32 }
-%struct.ByteAddressBuffer = type { i32 }
-%struct.RWByteAddressBuffer = type { i32 }
-
-;CHECK: Validation succeeded.
-
-define void @main() {
-  %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false)  ; CreateHandleFromBinding(bind,index,nonUniformIndex)
-  %2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false)  ; CreateHandleFromBinding(bind,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false)  ; CreateHandleFromBinding(bind,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 11, i32 0 })  ; AnnotateHandle(res,props)  resource: ByteAddressBuffer
-  %5 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %4, i32 0, i32 undef, i32 2)  ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
-  %6 = extractvalue %dx.types.ResRet.v8f16 %5, 0
-  %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 11, i32 0 })  ; AnnotateHandle(res,props)  resource: ByteAddressBuffer
-  %8 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %7, i32 0, i32 undef, i32 2)  ; RawBufferVectorLoad(buf,index,elementOffset,alignment)
-  %9 = extractvalue %dx.types.ResRet.v8f16 %8, 0
-  %10 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 })  ; AnnotateHandle(res,props)  resource: RWByteAddressBuffer
-  call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 0)  ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride)
-  ret void
-}
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32, %dx.types.Handle, i32, i32, i32) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.outerProductAccumulate.v8f16.v8f16(i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind }
-attributes #2 = { nounwind readnone }
-
-!dx.version = !{!0}
-!dx.valver = !{!0}
-!dx.shaderModel = !{!1}
-!dx.resources = !{!2}
-!dx.entryPoints = !{!8}
-
-!0 = !{i32 1, i32 10}
-!1 = !{!"cs", i32 6, i32 10}
-!2 = !{!3, !6, null, null}
-!3 = !{!4, !5}
-!4 = !{i32 0, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i32 0, null}
-!5 = !{i32 1, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 1, i32 1, i32 11, i32 0, null}
-!6 = !{!7}
-!7 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null}
-!8 = !{void ()* @main, !"main", null, !2, !9}
-!9 = !{i32 0, i64 8598323216, i32 4, !10}
-!10 = !{i32 1, i32 1, i32 1}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_add_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_add_invalid.hlsl
deleted file mode 100644
index ea19d21dc2..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_add_invalid.hlsl
+++ /dev/null
@@ -1,1398 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s -verify
-
-#include <dx/coopvec.h>
-
-using namespace dx::coopvec;
-
-ByteAddressBuffer input_vector_buffer;
-ByteAddressBuffer matrix_buffer;
-ByteAddressBuffer bias_buffer;
-RWByteAddressBuffer output_vector_buffer;
-ByteAddressBuffer constants_buffer;
-
-// Output vector, isUnsigned mismatch
-void test_invalid_output_vector_type() {
-
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 4> output_vector_0;
-  const uint is_output_unsigned_0 = 0;
-
-  // expected-error@+1 {{IsOuputUnsigned must be true for vector of unsigned integer type}}
-  __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned_0, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<int32_t, 4> output_vector_1;
-  const uint is_output_unsigned_1 = 1;
-
-  // expected-error@+1 {{IsOuputUnsigned must be false for vector of signed integer type}}
-  __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned_1, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<float, 4> output_vector_2;
-  const uint is_output_unsigned_2 = 1;
-
-  // expected-error@+1 {{IsOuputUnsigned must be false for vector of floating point type}}
-  __builtin_MatVecMulAdd(output_vector_2, is_output_unsigned_2, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// IsOutputUnsigned is not a constant parameter
-void test_invalid_is_output_unsigned_non_const() {
-
-  vector<uint, 4> output_vector_0;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint is_output_unsigned_0 = constants_buffer.Load<uint>(0);
-
-  // expected-error@+1 {{expression is not an integer constant expression}}
-  __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned_0, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Input vector is incorrect type - 64 bit types
-void test_invalid_input_vector_type() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-    vector<int64_t, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<int64_t, 4> >(0);
-    const uint is_input_unsigned_0 = 0;
-
-// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}}
-// expected-note@+1 {{candidate function not viable: no known conversion from 'vector<int64_t, 4>' to 'vector<float, 4>' for 3rd argument}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned_0, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-    vector<uint64_t, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<uint64_t, 4> >(0);
-    const uint is_input_unsigned_1 = 1;
-
-// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}}
-// expected-note@+1 {{candidate function not viable: no known conversion from 'vector<uint64_t, 4>' to 'vector<float, 4>' for 3rd argument}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned_1, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-    vector<float64_t, 4> input_vector_2 =
-      input_vector_buffer.Load<vector<float64_t, 4> >(0);
-    const uint is_input_unsigned_2 = 0;
-
-// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}}
-// expected-note@+1 {{candidate function not viable: no known conversion from 'vector<float64_t, 4>' to 'vector<float, 4>' for 3rd argument}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2,
-                        is_input_unsigned_2, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Input vector is incorrect type for packed InputInterpretation
-void test_invalid_input_vector_type_packed_input_interpretation() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint input_interpretation_0 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<int16_t, 2> input_vector_0 =
-      input_vector_buffer.Load<vector<int16_t, 2> >(0);
-  const uint is_input_unsigned_0 = 1;
-
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned_0, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  vector<uint16_t, 2> input_vector_1 =
-      input_vector_buffer.Load<vector<uint16_t, 2> >(0);
-  const uint is_input_unsigned_1 = 0;
-
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} 
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned_1, input_interpretation_1, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_2 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  vector<int32_t, 1> input_vector_2 =
-      input_vector_buffer.Load<vector<int32_t, 1> >(0);
-  const uint is_input_unsigned_2 = 1;
-  
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2,
-                        is_input_unsigned_2, input_interpretation_2, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_3 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<int32_t, 1> input_vector_3 =
-      input_vector_buffer.Load<vector<int32_t, 1> >(0);
-  const uint is_input_unsigned_3 = 0;
-
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_3,
-                        is_input_unsigned_3, input_interpretation_3, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_4 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<float, 1> input_vector_4 =
-      input_vector_buffer.Load<vector<float, 1> >(0);
-  const uint is_input_unsigned_4 = 0;
-
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_4, 
-                        is_input_unsigned_4, input_interpretation_4, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// IsInputUnsigned must be true for packed input vector type
-void test_invalid_is_input_unsigned_packed_input_vector_type() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;  
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;  
-  vector<uint, 1> input_vector_0 = 
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint is_input_unsigned_0 = 0;
-
-  // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned_0, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,  
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint is_input_unsigned_1 = 0;
-  
-  // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned_1, input_interpretation_1, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check packed input vector dimension
-void test_invalid_packed_input_vector_dimension() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_UINT8;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 0;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_UINT32;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint matrix_dimK_0 = 4;
-
-  // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint matrix_dimK_1 = 7;
-
-  // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned, input_interpretation, matrix_buffer, 
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_1, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<uint, 3> input_vector_2 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint matrix_dimK_2 = 7;
-
-  // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2,
-                        is_input_unsigned, input_interpretation, matrix_buffer, 
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_2, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-}
-
-// Check is Input vector type/isInputUnsigned matched
-void test_invalid_input_vector_type_mismatch() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);    
-  const uint is_input_unsigned_0 = 0;
-
-  // expected-error@+2 {{IsInputUnsigned must be true for vector of unsigned integer type}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned_0, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<int32_t, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<int32_t, 4> >(0);
-  const uint is_input_unsigned_1 = 1;
-
-  // expected-error@+2 {{IsInputUnsigned must be false for vector of signed integer type}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned_1, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<float16_t, 4> input_vector_2 =
-      input_vector_buffer.Load<vector<float16_t, 4> >(0);
-  const uint is_input_unsigned_2 = 1;
-
-  // expected-error@+2 {{IsInputUnsigned must be false for vector of floating point type}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2,
-                        is_input_unsigned_2, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-//  Check is Matrix M dimension is a constant parameter
-void test_invalid_matrix_M_dimension() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64; 
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_dimM = constants_buffer.Load<uint>(0);   
-  
-  // expected-error@+3 {{expression is not an integer constant expression}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-//  Check is Matrix K dimension is a constant parameter
-void test_invalid_matrix_K_dimension() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0; 
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_dimK = constants_buffer.Load<uint>(0);
-  
-  // expected-error@+4 {{expression is not an integer constant expression}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check is Matrix M dimension is non-zero
-void test_invalid_matrix_M_dimension_non_zero() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_dimM = 0;
-  // expected-error@+3 {{matrix dimension must be greater than 0}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check is Matrix K dimension is non-zero
-void test_invalid_matrix_K_dimension_non_zero() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_dimK = 0;
-  // expected-error@+4 {{matrix dimension must be greater than 0}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if Matrix M dimension is less than Max
-void test_invalid_matrix_M_dimension_less_than_Max() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = matrix_dimK * 4;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM_0 = 1025;
-
-  // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM_0,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_dimM_1 = 4097;
-
-  // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned, input_interpretation_1, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM_1,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if Matrix K dimension is less than Max in unpacked input vector case
-void test_invalid_matrix_K_dimension_less_than_Max_unpacked_input_vector() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK_0 = 1025;
-
-  // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<uint, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8;
-  const uint matrix_dimK_1 = 4096;
-  // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, 
-                        is_input_unsigned, input_interpretation_1, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_1, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-}
-
-// Check if Matrix M dimension is less than Max in packed input vector case
-void test_invalid_matrix_M_dimension_less_than_Max_packed_input_vector() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 1024;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 4096;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 1024> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 1024> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_dimK_0 = 4097;
-
-  // expected-error@+4 {{matrix dimension K when using packed input vectors must be less than 4096, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-void test_invalid_input_interpretation_non_const() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint input_interpretation = constants_buffer.Load<uint>(0);
-
-  // expected-error@+2 {{expression is not an integer constant expression}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if InputInterpretation is a valid value
-void test_invalid_input_interpretation_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);   
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint input_interpretation_0 = 0;
-
-  // expected-error@+2 {{0 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_0, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_1 = 1;
-
-  // expected-error@+2 {{1 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_1, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_2 = 6;
-
-  // expected-error@+2 {{6 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_2, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_3 = 7;
-
-  // expected-error@+2 {{7 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_3, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);   
-
-  const uint input_interpretation_4 = 10;
-
-  // expected-error@+2 {{10 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_4, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,    
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_5 = 11;       
-
-  // expected-error@+2 {{11 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_5, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_6 = 12;
-
-  // expected-error@+2 {{12 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_6, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_7 = 13;
-
-  // expected-error@+2 {{13 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_7, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_8 = 14;
-
-  // expected-error@+2 {{14 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_8, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_9 = 15;
-
-  // expected-error@+2 {{15 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_9, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_10 = 16;
-
-  // expected-error@+2 {{16 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_10, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_11 = 23;
-
-  // expected-error@+2 {{23 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_11, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_12 = 100;
-
-  // expected-error@+2 {{100 is an invalid register interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation_12, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-// Check if Input and Output vector dimensions are valid -non packed
-void test_invalid_input_output_vector_dimensions_non_packed_square_matrix() {
-
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 32;
-  const uint matrix_dimK = 32;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 32> output_vector_0;
-  vector<float, 30> input_vector_0 =   
-      input_vector_buffer.Load<vector<float, 30> >(0);
-
-  // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned, input_vector_0,  
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<uint, 30> output_vector_1;
-  vector<float, 32> input_vector_1 =   
-      input_vector_buffer.Load<vector<float, 32> >(0);
-
-  // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned, input_vector_1,    
-                        is_input_unsigned, input_interpretation, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if Input and Output vector dimensions are valid -non packed
-void test_invalid_input_output_vector_dimensions_non_packed_rectangle_matrix() {
-
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 16;
-  const uint matrix_dimK = 32;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  // Use dimension of Matrix K to trigger error
-  vector<uint, 32> output_vector_0;
-  vector<float, 32> input_vector_0 =   
-      input_vector_buffer.Load<vector<float, 32> >(0);
-
-  // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned, input_vector_0,  
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride, bias_buffer, bias_offset, bias_interpretation);
- 
- // Check off by 1 errors
-  vector<uint, 17> output_vector_1;
-  vector<float, 16> input_vector_1 =   
-      input_vector_buffer.Load<vector<float, 16> >(0);
-
-  // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned, input_vector_1,    
-                        is_input_unsigned, input_interpretation, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
- // Check off by 1 errors
- vector<uint, 15> output_vector_2;
- vector<float, 16> input_vector_2 =   
-     input_vector_buffer.Load<vector<float, 16> >(0);
-
- // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}         
- __builtin_MatVecMulAdd(output_vector_2, is_output_unsigned, input_vector_2,    
-                        is_input_unsigned, input_interpretation, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  // Use dimension of Matrix M to trigger error 
-  vector<uint, 16> output_vector_3;
-  vector<float, 16> input_vector_3 =   
-      input_vector_buffer.Load<vector<float, 16> >(0);
-
-  // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMulAdd(output_vector_3, is_output_unsigned, input_vector_3,  
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  // Check off by 1 errors
-  vector<uint, 16> output_vector_4;
-  vector<float, 31> input_vector_4 =   
-      input_vector_buffer.Load<vector<float, 31> >(0);
-
-  // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}}    
-  __builtin_MatVecMulAdd(output_vector_4, is_output_unsigned, input_vector_4,  
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  // Check off by 1 errors
-  vector<uint, 16> output_vector_5;
-  vector<float, 33> input_vector_5 =   
-      input_vector_buffer.Load<vector<float, 33> >(0);
-
-  // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}}    
-  __builtin_MatVecMulAdd(output_vector_5, is_output_unsigned, input_vector_5,  
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-    // Swap dimensions to trigger error
-    vector<uint, 32> output_vector_6;
-    vector<float, 16> input_vector_6 =   
-        input_vector_buffer.Load<vector<float, 16> >(0);
-
-    // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}    
-    __builtin_MatVecMulAdd(output_vector_6, is_output_unsigned, input_vector_6,  
-                          is_input_unsigned, input_interpretation, matrix_buffer,
-                          matrix_offset, matrix_interpretation, matrix_dimM,
-                          matrix_dimK, matrix_layout, matrix_is_transposed,
-                          matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if matrtrix  interpretation is a constant value
-void test_invalid_matrix_interpretation_constant_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_interpretation_0 = constants_buffer.Load<uint>(0);
-
-  // expected-error@+3 {{expression is not an integer constant expression}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_0, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check for invalid matrix interpretation value
-void test_invalid_matrix_interpretation_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_interpretation_0 = 0;
-
-  // expected-error@+3 {{0 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_0, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_1 = 1;
-
-  // expected-error@+3 {{1 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_1, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_2 = 6;
-
-  // expected-error@+3 {{6 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_2, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_3 = 7;
-
-  // expected-error@+3 {{7 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,   
-                        matrix_offset, matrix_interpretation_3, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_4 = 10;
-
-  // expected-error@+3 {{10 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_4, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_5 = 11;
-
-  // expected-error@+3 {{11 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_5, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_6 = 12;
-
-  // expected-error@+3 {{12 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_6, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_7 = 13;
-
-  // expected-error@+3 {{13 is an invalid memory interpretation value}} 
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_7, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_8 = 14;
-
-  // expected-error@+3 {{14 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_8, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_9 = 15;
-
-  // expected-error@+3 {{15 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_9, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_10 = 16;
-
-  // expected-error@+3 {{16 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_10, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_11 = 23;
-  // expected-error@+3 {{23 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_11, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_interpretation_12 = 100;
-
-  // expected-error@+3 {{100 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation_12, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if matrix Layout is a constant value
-void test_invalid_matrix_layout_constant_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_layout = constants_buffer.Load<uint>(0);
-
-  // expected-error@+4 {{expression is not an integer constant expression}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check invalid matrix layout value
-void test_invalid_matrix_layout_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_layout_0 = 4;
-
-  // expected-error@+4 {{matrix layout 4 is not valid, must be in the range [0, 3]}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout_0, matrix_is_transposed,
-                      matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if matrix is transposed is a constant value
-void test_invalid_matrix_transposed_constant_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = constants_buffer.Load<bool>(0);
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  // expected-error@+4 {{expression is not an integer constant expression}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if invalid matrix transpose value is used
-void test_invalid_matrix_transpose_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =   
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;   
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed_0 = true;
-
-  // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout_0, matrix_is_transposed_0,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR;
-  const bool matrix_is_transposed_1 = true;
-
-  // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout_1, matrix_is_transposed_1,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-
-// Check invalid matrix stride value for optimal matrix layout
-void test_invalid_matrix_stride_constant_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const bool matrix_is_transposed = false;
-
-  const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL;
-  const uint matrix_stride_0 = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout_0, matrix_is_transposed,
-                        matrix_stride_0, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL;
-  const uint matrix_stride_1 = 64;
-  
-  // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                        is_input_unsigned, input_interpretation, matrix_buffer,   
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout_1, matrix_is_transposed,
-                        matrix_stride_1, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check bias interpretation is not a constant value
-void test_invalid_bias_interpretation() {
-  vector<float, 4> output_vector;
-  const uint is_output_unsigned = 0;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const uint matrix_is_transposed = 0;
-  const uint matrix_stride = 0;
-  const uint bias_offset = 0;
-
-  const uint bias_interpretation_0 = constants_buffer.Load<uint>(0);
-
-  // expected-error@+6 {{expression is not an integer constant expression}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_0);
-}
-
-// Check bias interpretation is not a valid value
-void test_invalid_bias_interpretation_value() {
-  vector<float, 4> output_vector;
-  const uint is_output_unsigned = 0;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4; 
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const uint matrix_is_transposed = 0;
-  const uint matrix_stride = 0;
-  const uint bias_offset = 0;
-
-  const uint bias_interpretation_0 = 0;
-
-  // expected-error@+6 {{0 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_0);
-
-  const uint bias_interpretation_1 = 1;
-
-  // expected-error@+6 {{1 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_1);
-
-  const uint bias_interpretation_2 = 6;
-
-  // expected-error@+6 {{6 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_2);
-
-  const uint bias_interpretation_3 = 7;
-
-  // expected-error@+6 {{7 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_3);
-
-  const uint bias_interpretation_4 = 10;
-
-  // expected-error@+6 {{10 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_4);
-
-  const uint bias_interpretation_5 = 11;
-
-  // expected-error@+6 {{11 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_5);
-
-  const uint bias_interpretation_6 = 12;
-
-  // expected-error@+6 {{12 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,  
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_6);
-
-  const uint bias_interpretation_7 = 13;
-
-  // expected-error@+6 {{13 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_7);
-
-  const uint bias_interpretation_8 = 14;
-
-  // expected-error@+6 {{14 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_8);
-
-  const uint bias_interpretation_9 = 15;
-
-  // expected-error@+6 {{15 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_9);
-
-  const uint bias_interpretation_10 = 16;  
-  
-  // expected-error@+6 {{16 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_10);
-
-  const uint bias_interpretation_11 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-
-  // expected-error@+6 {{17 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_11);
-
-  const uint bias_interpretation_12 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-
-  // expected-error@+6 {{18 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_12);
-
-  const uint bias_interpretation_13 = 23;
-
-  // expected-error@+6 {{23 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_13);
-
-  const uint bias_interpretation_14 = 100;
-
-  // expected-error@+6 {{100 is an invalid memory interpretation value}}
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-                         is_input_unsigned, input_interpretation, matrix_buffer,
-                         matrix_offset, matrix_interpretation, matrix_dimM,
-                         matrix_dimK, matrix_layout, matrix_is_transposed,
-                         matrix_stride, bias_buffer, bias_offset,
-                         bias_interpretation_14);
-  }     
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_add_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_add_valid.hlsl
deleted file mode 100644
index df1ce122ae..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_add_valid.hlsl
+++ /dev/null
@@ -1,245 +0,0 @@
-// REQUIRES: dxil-1-10
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 %s
-
-#include <dx/coopvec.h>
-
-using namespace dx::coopvec;
-
-ByteAddressBuffer input_vector_buffer;
-ByteAddressBuffer matrix_buffer;
-ByteAddressBuffer bias_buffer;
-RWByteAddressBuffer output_vector_buffer;
-ByteAddressBuffer constants_buffer;
-
-// Check valid input vector packed types
-void test_valid_input_vector_packed_types() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
- const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;
- vector<uint32_t, 1> input_vector_0 =
-     input_vector_buffer.Load<vector<uint32_t, 4> >(0);
- const uint is_input_unsigned_0 = 1;
-
- // expected-no-diagnostics@+1
- __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, 
-                       is_input_unsigned_0, input_interpretation_0, matrix_buffer,
-                       matrix_offset, matrix_interpretation, matrix_dimM,
-                       matrix_dimK, matrix_layout, matrix_is_transposed,
-                       matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
- const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED;
- vector<uint32_t, 1> input_vector_1 =
-     input_vector_buffer.Load<vector<uint32_t, 1> >(0);
- const uint is_input_unsigned_1 = 1;
-
- // expected-no-diagnostics@+1  
- __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                       is_input_unsigned_1, input_interpretation_1, matrix_buffer,
-                       matrix_offset, matrix_interpretation, matrix_dimM,
-                       matrix_dimK, matrix_layout, matrix_is_transposed,
-                       matrix_stride, bias_buffer, bias_offset, bias_interpretation);                  
-
-}
-
-// IsInputUnsigned must be true for packed input vector type
-void test_valid_is_input_unsigned_packed_input_vector_type() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;  
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;  
-  vector<uint, 1> input_vector_0 = 
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint is_input_unsigned_0 = 1;
-
-  // expected-no-diagnostics@+2
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned_0, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,  
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint is_input_unsigned_1 = 1;
-  
-  // expected-no-diagnostics@+2
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned_1, input_interpretation_1, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check packed input vector dimension
-void test_valid_packed_input_vector_dimension() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_UINT8;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 0;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_UINT32;
-
-  vector<uint, 1> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint matrix_dimK_0 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<uint, 2> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 2> >(0);
-  const uint matrix_dimK_1 = 7;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned, input_interpretation, matrix_buffer, 
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_1, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if Matrix M dimension is less than Max
-void test_valid_matrix_M_dimension_less_than_Max() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = matrix_dimK * 4;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM_0 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM_0,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_dimM_1 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1,
-                        is_input_unsigned, input_interpretation_1, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM_1,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-// Check if Matrix K dimension is less than Max in unpacked input vector case
-void test_valid_matrix_K_dimension_less_than_Max_unpacked_input_vector() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK_0 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-  vector<uint, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8;
-  const uint matrix_dimK_1 = 4;
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, 
-                        is_input_unsigned, input_interpretation_1, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_1, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-
-}
-
-// Check if Matrix M dimension is less than Max in packed input vector case
-void test_valid_matrix_M_dimension_less_than_Max_packed_input_vector() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  vector<uint, 1024> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 1024> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_dimK_0 = 4096;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0,
-                        is_input_unsigned, input_interpretation_0, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                        matrix_stride, bias_buffer, bias_offset, bias_interpretation);
-}
-
-
-
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_invalid.hlsl
deleted file mode 100644
index 612d47d5a1..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_invalid.hlsl
+++ /dev/null
@@ -1,1156 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s -verify
-
-#include <dx/coopvec.h>
-
-using namespace dx::coopvec;
-
-ByteAddressBuffer input_vector_buffer;
-ByteAddressBuffer matrix_buffer;
-RWByteAddressBuffer output_vector_buffer;
-ByteAddressBuffer constants_buffer;
-
-// Output vector, isUnsigned mismatch
-void test_invalid_output_vector_type() {
-
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  vector<uint, 4> output_vector_0;
-  const uint is_output_unsigned_0 = 0;
-
-  // expected-error@+1 {{IsOuputUnsigned must be true for vector of unsigned integer type}}
-  __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<int32_t, 4> output_vector_1;
-  const uint is_output_unsigned_1 = 1;
-
-  // expected-error@+1 {{IsOuputUnsigned must be false for vector of signed integer type}}
-  __builtin_MatVecMul(output_vector_1, is_output_unsigned_1, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<float, 4> output_vector_2;
-  const uint is_output_unsigned_2 = 1;
-
-  // expected-error@+1 {{IsOuputUnsigned must be false for vector of floating point type}}
-  __builtin_MatVecMul(output_vector_2, is_output_unsigned_2, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// IsOutputUnsigned is not a constant parameter
-void test_invalid_is_output_unsigned_non_const() {
-
-  vector<uint, 4> output_vector_0;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint is_output_unsigned_0 = constants_buffer.Load<uint>(0);
-
-  // expected-error@+1 {{expression is not an integer constant expression}}
-  __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Input vector is incorrect type - 64 bit types
-void test_invalid_input_vector_type() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-    vector<int64_t, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<int64_t, 4> >(0);
-    const uint is_input_unsigned_0 = 0;
-
-// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}}
-// expected-note@+1 {{candidate function not viable: no known conversion from 'vector<int64_t, 4>' to 'vector<float, 4>' for 3rd argument}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned_0, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-    vector<uint64_t, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<uint64_t, 4> >(0);
-    const uint is_input_unsigned_1 = 1;
-
-// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}}
-// expected-note@+1 {{candidate function not viable: no known conversion from 'vector<uint64_t, 4>' to 'vector<float, 4>' for 3rd argument}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned_1, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-    vector<float64_t, 4> input_vector_2 =
-      input_vector_buffer.Load<vector<float64_t, 4> >(0);
-    const uint is_input_unsigned_2 = 0;
-
-// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}}
-// expected-note@+1 {{candidate function not viable: no known conversion from 'vector<float64_t, 4>' to 'vector<float, 4>' for 3rd argument}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2,
-                      is_input_unsigned_2, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Input vector is incorrect type for packed InputInterpretation
-void test_invalid_input_vector_type_packed_input_interpretation() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint input_interpretation_0 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<int16_t, 2> input_vector_0 =
-      input_vector_buffer.Load<vector<int16_t, 2> >(0);
-  const uint is_input_unsigned_0 = 1;
-
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned_0, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  vector<uint16_t, 2> input_vector_1 =
-      input_vector_buffer.Load<vector<uint16_t, 2> >(0);
-  const uint is_input_unsigned_1 = 0;
-
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} 
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned_1, input_interpretation_1, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_2 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  vector<int32_t, 1> input_vector_2 =
-      input_vector_buffer.Load<vector<int32_t, 1> >(0);
-  const uint is_input_unsigned_2 = 1;
-  
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2,
-                      is_input_unsigned_2, input_interpretation_2, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_3 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<int32_t, 1> input_vector_3 =
-      input_vector_buffer.Load<vector<int32_t, 1> >(0);
-  const uint is_input_unsigned_3 = 0;
-
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_3,
-                      is_input_unsigned_3, input_interpretation_3, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_4 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<float, 1> input_vector_4 =
-      input_vector_buffer.Load<vector<float, 1> >(0);
-  const uint is_input_unsigned_4 = 0;
-
-  // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_4, 
-                      is_input_unsigned_4, input_interpretation_4, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// IsInputUnsigned must be true for packed input vector type
-void test_invalid_is_input_unsigned_packed_input_vector_type() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;  
-  const uint matrix_stride = 64;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32;
-
-  const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;  
-  vector<uint, 1> input_vector_0 = 
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint is_input_unsigned_0 = 0;
-
-  // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned_0, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,  
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint is_input_unsigned_1 = 0;
-  
-  // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned_1, input_interpretation_1, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check packed input vector dimension
-void test_invalid_packed_input_vector_dimension() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_UINT8;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 0;
-  const uint bias_offset = 0;
-  const uint bias_interpretation = DataType::DATA_TYPE_UINT32;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint matrix_dimK_0 = 4;
-
-  // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint matrix_dimK_1 = 7;
-
-  // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned, input_interpretation, matrix_buffer, 
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_1, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<uint, 3> input_vector_2 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint matrix_dimK_2 = 7;
-
-  // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2,
-                      is_input_unsigned, input_interpretation, matrix_buffer, 
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_2, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-}
-
-// Input vector type/isInputUnsigned mismatch
-void test_invalid_input_vector_type_mismatch() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);    
-  const uint is_input_unsigned_0 = 0;
-
-  // expected-error@+2 {{IsInputUnsigned must be true for vector of unsigned integer type}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned_0, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<int32_t, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<int32_t, 4> >(0);
-  const uint is_input_unsigned_1 = 1;
-
-  // expected-error@+2 {{IsInputUnsigned must be false for vector of signed integer type}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned_1, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<float16_t, 4> input_vector_2 =
-      input_vector_buffer.Load<vector<float16_t, 4> >(0);
-  const uint is_input_unsigned_2 = 1;
-
-  // expected-error@+2 {{IsInputUnsigned must be false for vector of floating point type}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2,
-                      is_input_unsigned_2, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-//  Check is Matrix M dimension is a constant parameter
-void test_invalid_matrix_M_dimension() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64; 
-
-  const uint matrix_dimM = constants_buffer.Load<uint>(0);   
-  
-  // expected-error@+3 {{expression is not an integer constant expression}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-//  Check is Matrix K dimension is a constant parameter
-void test_invalid_matrix_K_dimension() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0; 
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint matrix_dimK = constants_buffer.Load<uint>(0);
-  
-  // expected-error@+4 {{expression is not an integer constant expression}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check is Matrix M dimension is non-zero
-void test_invalid_matrix_M_dimension_non_zero() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint matrix_dimM = 0;
-  // expected-error@+3 {{matrix dimension must be greater than 0}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check is Matrix K dimension is non-zero
-void test_invalid_matrix_K_dimension_non_zero() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint matrix_dimK = 0;
-  // expected-error@+4 {{matrix dimension must be greater than 0}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if Matrix M dimension is less than Max
-void test_invalid_matrix_M_dimension_less_than_Max() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = matrix_dimK * 4;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM_0 = 1025;
-
-  // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM_0,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_dimM_1 = 4097;
-
-  // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned, input_interpretation_1, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM_1,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if Matrix K dimension is less than Max in unpacked input vector case
-void test_invalid_matrix_K_dimension_less_than_Max_unpacked_input_vector() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK_0 = 1025;
-
-  // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<uint, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8;
-  const uint matrix_dimK_1 = 4096;
-  // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, 
-                      is_input_unsigned, input_interpretation_1, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_1, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-}
-
-// Check if Matrix M dimension is less than Max in packed input vector case
-void test_invalid_matrix_M_dimension_less_than_Max_packed_input_vector() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 1024;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 4096;
-
-  vector<uint, 1024> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 1024> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_dimK_0 = 4097;
-
-  // expected-error@+4 {{matrix dimension K when using packed input vectors must be less than 4096, in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-//Check if InputInterpretation is a constant parameter
-void test_invalid_input_interpretation_non_const() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint input_interpretation = constants_buffer.Load<uint>(0);
-
-  // expected-error@+2 {{expression is not an integer constant expression}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if InputInterpretation is a valid value
-void test_invalid_input_interpretation_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);   
-  const uint is_input_unsigned = 0;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint input_interpretation_0 = 0;
-
-  // expected-error@+2 {{0 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_0, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_1 = 1;
-
-  // expected-error@+2 {{1 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_1, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_2 = 6;
-
-  // expected-error@+2 {{6 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_2, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_3 = 7;
-
-  // expected-error@+2 {{7 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_3, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);   
-
-  const uint input_interpretation_4 = 10;
-
-  // expected-error@+2 {{10 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_4, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,    
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_5 = 11;       
-
-  // expected-error@+2 {{11 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_5, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_6 = 12;
-
-  // expected-error@+2 {{12 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_6, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_7 = 13;
-
-  // expected-error@+2 {{13 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_7, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_8 = 14;
-
-  // expected-error@+2 {{14 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_8, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_9 = 15;
-
-  // expected-error@+2 {{15 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_9, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_10 = 16;
-
-  // expected-error@+2 {{16 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_10, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_11 = 23;
-
-  // expected-error@+2 {{23 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_11, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_12 = 100;
-
-  // expected-error@+2 {{100 is an invalid register interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation_12, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-// Check if Input and Output vector dimensions are valid -non packed
-void test_invalid_input_output_vector_dimensions_non_packed_square_matrix() {
-
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 32;
-  const uint matrix_dimK = 32;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  vector<uint, 32> output_vector_0;
-  vector<float, 30> input_vector_0 =   
-      input_vector_buffer.Load<vector<float, 30> >(0);
-
-  // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector_0, is_output_unsigned, input_vector_0,  
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<uint, 30> output_vector_1;
-  vector<float, 32> input_vector_1 =   
-      input_vector_buffer.Load<vector<float, 32> >(0);
-
-  // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector_1, is_output_unsigned, input_vector_1,    
-                      is_input_unsigned, input_interpretation, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if Input and Output vector dimensions are valid -non packed
-void test_invalid_input_output_vector_dimensions_non_packed_rectangle_matrix() {
-
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 16;
-  const uint matrix_dimK = 32;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  // Use dimension of Matrix K to trigger error
-  vector<uint, 32> output_vector_0;
-  vector<float, 32> input_vector_0 =   
-      input_vector_buffer.Load<vector<float, 32> >(0);
-
-  // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector_0, is_output_unsigned, input_vector_0,  
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
- 
- // Check off by 1 errors
-  vector<uint, 17> output_vector_1;
-  vector<float, 16> input_vector_1 =   
-      input_vector_buffer.Load<vector<float, 16> >(0);
-
-  // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector_1, is_output_unsigned, input_vector_1,    
-                      is_input_unsigned, input_interpretation, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
- // Check off by 1 errors
- vector<uint, 15> output_vector_2;
- vector<float, 16> input_vector_2 =   
-     input_vector_buffer.Load<vector<float, 16> >(0);
-
- // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}         
- __builtin_MatVecMul(output_vector_2, is_output_unsigned, input_vector_2,    
-                      is_input_unsigned, input_interpretation, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  // Use dimension of Matrix M to trigger error 
-  vector<uint, 16> output_vector_3;
-  vector<float, 16> input_vector_3 =   
-      input_vector_buffer.Load<vector<float, 16> >(0);
-
-  // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}}
-  __builtin_MatVecMul(output_vector_3, is_output_unsigned, input_vector_3,  
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  // Check off by 1 errors
-  vector<uint, 16> output_vector_4;
-  vector<float, 31> input_vector_4 =   
-      input_vector_buffer.Load<vector<float, 31> >(0);
-
-  // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}}    
-  __builtin_MatVecMul(output_vector_4, is_output_unsigned, input_vector_4,  
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  // Check off by 1 errors
-  vector<uint, 16> output_vector_5;
-  vector<float, 33> input_vector_5 =   
-      input_vector_buffer.Load<vector<float, 33> >(0);
-
-  // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}}    
-  __builtin_MatVecMul(output_vector_5, is_output_unsigned, input_vector_5,  
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-    // Swap dimensions to trigger error
-    vector<uint, 32> output_vector_6;
-    vector<float, 16> input_vector_6 =   
-        input_vector_buffer.Load<vector<float, 16> >(0);
-
-    // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}}    
-    __builtin_MatVecMul(output_vector_6, is_output_unsigned, input_vector_6,  
-                        is_input_unsigned, input_interpretation, matrix_buffer,
-                        matrix_offset, matrix_interpretation, matrix_dimM,
-                        matrix_dimK, matrix_layout, matrix_is_transposed,
-                        matrix_stride);
-}
-
-// Check if matrtrix  interpretation is a constant value
-void test_invalid_matrix_interpretation_constant_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint matrix_interpretation_0 = constants_buffer.Load<uint>(0);
-
-  // expected-error@+3 {{expression is not an integer constant expression}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_0, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check for invalid matrix interpretation value
-void test_invalid_matrix_interpretation_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint matrix_interpretation_0 = 0;
-
-  // expected-error@+3 {{0 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_0, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_1 = 1;
-
-  // expected-error@+3 {{1 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_1, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_2 = 6;
-
-  // expected-error@+3 {{6 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_2, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_3 = 7;
-
-  // expected-error@+3 {{7 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,   
-                      matrix_offset, matrix_interpretation_3, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_4 = 10;
-
-  // expected-error@+3 {{10 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_4, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_5 = 11;
-
-  // expected-error@+3 {{11 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_5, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_6 = 12;
-
-  // expected-error@+3 {{12 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_6, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_7 = 13;
-
-  // expected-error@+3 {{13 is an invalid memory interpretation value}} 
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_7, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);       
-
-  const uint matrix_interpretation_8 = 14;
-
-  // expected-error@+3 {{14 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_8, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_9 = 15;
-
-  // expected-error@+3 {{15 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_9, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_10 = 16;
-
-  // expected-error@+3 {{16 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_10, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_11 = 23;
-  // expected-error@+3 {{23 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_11, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint matrix_interpretation_12 = 100;
-
-  // expected-error@+3 {{100 is an invalid memory interpretation value}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation_12, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if matrix Layout is a constant value
-void test_invalid_matrix_layout_constant_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);   
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint matrix_layout = constants_buffer.Load<uint>(0);
-
-  // expected-error@+4 {{expression is not an integer constant expression}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check invalid matrix layout value
-void test_invalid_matrix_layout_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint matrix_layout_0 = 4;
-
-  // expected-error@+4 {{matrix layout 4 is not valid, must be in the range [0, 3]}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout_0, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if matrix is transposed is a constant value
-void test_invalid_matrix_transposed_constant_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = constants_buffer.Load<bool>(0);
-  const uint matrix_stride = 64;
-
-  // expected-error@+4 {{expression is not an integer constant expression}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if invalid matrix transpose value is used
-void test_invalid_matrix_transpose_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =   
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;   
-  const uint matrix_stride = 64;
-
-  const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed_0 = true;
-
-  // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout_0, matrix_is_transposed_0,
-                      matrix_stride);
-
-  const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR;
-  const bool matrix_is_transposed_1 = true;
-
-  // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout_1, matrix_is_transposed_1,
-                      matrix_stride);
-}
-
-
-// Check invalid matrix stride value for optimal matrix layout
-void test_invalid_matrix_stride_constant_value() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const bool matrix_is_transposed = false;
-
-  const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL;
-  const uint matrix_stride_0 = 64;
-
-  // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout_0, matrix_is_transposed,
-                      matrix_stride_0);
-
-  const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL;
-  const uint matrix_stride_1 = 64;
-
-  // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}}
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,   
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout_1, matrix_is_transposed,
-                      matrix_stride_1);
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_valid.hlsl
deleted file mode 100644
index b62d4e3be1..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/mul_valid.hlsl
+++ /dev/null
@@ -1,344 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s -verify
-
-#include <dx/coopvec.h>
-
-using namespace dx::coopvec;
-
-ByteAddressBuffer input_vector_buffer;
-ByteAddressBuffer matrix_buffer; 
-RWByteAddressBuffer output_vector_buffer;
-ByteAddressBuffer const_buffer;
-
-// Output vector, isUnsigned mismatch
-void test_valid_output_vector_type() {
-
-    vector<float, 4> input_vector = input_vector_buffer.Load<vector<float, 4> >(0);
-    const uint is_input_unsigned = 0;
-    const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-    const uint matrix_offset = 0;
-    const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-    const uint matrix_dimM = 4;
-    const uint matrix_dimK = 4;
-    const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-    const bool matrix_is_transposed = false;
-    const uint matrix_stride = 64;
-
-    vector<uint, 4> output_vector_0;
-    const uint is_output_unsigned_0 = 1;
-
-    // expected-no-diagnostics@+1
-    __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector,
-        is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-        matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-        matrix_is_transposed, matrix_stride);
-
-    vector<int32_t, 4> output_vector_1;
-    const uint is_output_unsigned_1 = 0;
-
-    // expected-no-diagnostics@+1
-    __builtin_MatVecMul(output_vector_1, is_output_unsigned_1, input_vector,
-        is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-        matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-        matrix_is_transposed, matrix_stride);
-
-    vector<float, 4> output_vector_2;
-    const uint is_output_unsigned_2 = 0;
-
-    // expected-no-diagnostics@+1
-    __builtin_MatVecMul(output_vector_2, is_output_unsigned_2, input_vector,
-        is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-        matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-        matrix_is_transposed, matrix_stride);
-}
-
-void test_valid_is_output_unsigned_non_const() {
-
-  vector<uint, 4> output_vector_0;
-  vector<float, 4> input_vector =
-      input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint is_input_unsigned = 0;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  const uint is_output_unsigned_0 = 1;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Input vector is incorrect type
-void test_valid_input_vector_type() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-    vector<int32_t, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<int32_t, 4> >(0);
-    const uint is_input_unsigned_0 = 0;
-
- // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned_0, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-    vector<uint32_t, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<uint32_t, 4> >(0);
-    const uint is_input_unsigned_1 = 1;
-
- // expected-no-diagnostics@+1 
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned_1, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-    vector<float16_t, 4> input_vector_2 =
-      input_vector_buffer.Load<vector<float16_t, 4> >(0);
-    const uint is_input_unsigned_2 = 0;
-
- // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2,
-                      is_input_unsigned_2, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check valid input vector packed types
-void test_valid_input_vector_packed_types() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
- const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;
- vector<uint32_t, 1> input_vector_0 =
-     input_vector_buffer.Load<vector<uint32_t, 1> >(0);
- const uint is_input_unsigned_0 = 1;
-
- // expected-no-diagnostics@+1
- __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, 
-                     is_input_unsigned_0, input_interpretation_0, matrix_buffer,
-                     matrix_offset, matrix_interpretation, matrix_dimM,
-                     matrix_dimK, matrix_layout, matrix_is_transposed,
-                     matrix_stride);
-
- const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED;
- vector<uint32_t, 1> input_vector_1 =
-     input_vector_buffer.Load<vector<uint32_t, 1> >(0);
- const uint is_input_unsigned_1 = 1;
-
- // expected-no-diagnostics@+1  
- __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                     is_input_unsigned_1, input_interpretation_1, matrix_buffer,
-                     matrix_offset, matrix_interpretation, matrix_dimM,
-                     matrix_dimK, matrix_layout, matrix_is_transposed,
-                     matrix_stride);                  
-
-}
-
-// IsInputUnsigned must be true for packed input vector type
-void test_valid_is_input_unsigned_packed_input_vector_type() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;  
-  const uint matrix_stride = 64;
-
-  const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;  
-  vector<uint, 1> input_vector_0 = 
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint is_input_unsigned_0 = 1;
-
-  // expected-no-diagnostics@+2
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned_0, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,  
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint is_input_unsigned_1 = 1;
-  
-  // expected-no-diagnostics@+2
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned_1, input_interpretation_1, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check packed input vector dimension
-void test_valid_packed_input_vector_dimension() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_UINT8;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 0;
-
-  vector<uint, 1> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint matrix_dimK_0 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned, input_interpretation, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<uint, 2> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 2> >(0);
-  const uint matrix_dimK_1 = 7;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned, input_interpretation, matrix_buffer, 
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_1, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if Matrix M dimension is less than Max
-void test_valid_matrix_M_dimension_less_than_Max() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = matrix_dimK * 4;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM_0 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM_0,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<uint, 1> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 1> >(0);
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_dimM_1 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1,
-                      is_input_unsigned, input_interpretation_1, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM_1,
-                      matrix_dimK, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
-
-// Check if Matrix K dimension is less than Max in unpacked input vector case
-void test_valid_matrix_K_dimension_less_than_Max_unpacked_input_vector() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  vector<uint, 4> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimK_0 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-  vector<uint, 4> input_vector_1 =
-      input_vector_buffer.Load<vector<uint, 4> >(0);
-  const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8;
-  const uint matrix_dimK_1 = 4;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, 
-                      is_input_unsigned, input_interpretation_1, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_1, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-
-}
-
-// Check if Matrix M dimension is less than Max in packed input vector case
-void test_valid_matrix_M_dimension_less_than_Max_packed_input_vector() {
-
-  vector<uint, 4> output_vector;
-  const uint is_output_unsigned = 1;
-  const uint is_input_unsigned = 1;
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_dimM = 4;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-  const bool matrix_is_transposed = false;
-  const uint matrix_stride = 64;
-
-  vector<uint, 1024> input_vector_0 =
-      input_vector_buffer.Load<vector<uint, 1024> >(0);
-  const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-  const uint matrix_dimK_0 = 4096;
-
-  // expected-no-diagnostics@+1
-  __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0,
-                      is_input_unsigned, input_interpretation_0, matrix_buffer,
-                      matrix_offset, matrix_interpretation, matrix_dimM,
-                      matrix_dimK_0, matrix_layout, matrix_is_transposed,
-                      matrix_stride);
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/outer_product_accumulate_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/outer_product_accumulate_invalid.hlsl
deleted file mode 100644
index 05aa76d779..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/outer_product_accumulate_invalid.hlsl
+++ /dev/null
@@ -1,256 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s -verify
-
-#include <dx/coopvec.h>
-
-using namespace dx::coopvec;
-
-ByteAddressBuffer input_vector_buffer;
-RWByteAddressBuffer accumulate_buffer;
-ByteAddressBuffer constants_buffer;
-
-// Check if input vectors aren't the same component type
-void test_invalid_input_vector_component_type() {
-
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL;
-  const uint matrix_stride = 0;
-
-  vector<float, 4> input_vector_0_0 = input_vector_buffer.Load<vector<float, 4> >(0);
-  vector<uint, 4> input_vector_1_0 = input_vector_buffer.Load<vector<uint, 4> >(0);
-
-  // expected-error@+1 {{input vectors of outerproductaccumulate must have the same element type}}
-  __builtin_OuterProductAccumulate(input_vector_0_0, input_vector_1_0,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-
-  vector<int, 4> input_vector_0_1 = input_vector_buffer.Load<vector<int, 4> >(0);
-  vector<float, 4> input_vector_1_1 = input_vector_buffer.Load<vector<float, 4> >(0);
-
-  // expected-error@+1 {{input vectors of outerproductaccumulate must have the same element type}}
-  __builtin_OuterProductAccumulate(input_vector_0_1, input_vector_1_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-}
-
-// Check for non constant matrix interpretation
-void test_non_constant_matrix_interpretation() {
-
-  vector<float, 4> input_vector_0 = input_vector_buffer.Load<vector<float, 4> >(0);
-  vector<float, 4> input_vector_1 = input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint matrix_offset = 0;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL;
-  const uint matrix_stride = 0;
-
-  const uint matrix_interpretation = constants_buffer.Load<uint>(0);
-
-  // expected-error@+3 {{expression is not an integer constant expression}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-}
-
-// Check for matrix interpretation is not a valid value
-void test_invalid_matrix_interpretation() {
-
-  vector<float, 4> input_vector_0 = input_vector_buffer.Load<vector<float, 4> >(0);
-  vector<float, 4> input_vector_1 = input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint matrix_offset = 0;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL;
-  const uint matrix_stride = 0;
-
-  const uint matrix_interpretation = 0;
-
-  // expected-error@+3 {{0 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_2 = 1;
-
-  // expected-error@+3 {{1 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_2, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_3 = 6;
-
-  // expected-error@+3 {{6 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_3, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_4 = 7;
-
-  // expected-error@+3 {{7 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_4, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_5 = 10;
-
-  // expected-error@+3 {{10 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_5, matrix_layout,
-                                  matrix_stride); 
-
-  const uint matrix_interpretation_6 = 11;
-
-  // expected-error@+3 {{11 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_6, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_7 = 12;
-
-  // expected-error@+3 {{12 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_7, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_8 = 13;
-
-  // expected-error@+3 {{13 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_8, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_9 = 14;
-
-  // expected-error@+3 {{14 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_9, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_10 = 15;
-
-  // expected-error@+3 {{15 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_10, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_11 = 16;
-
-  // expected-error@+3 {{16 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_11, matrix_layout,
-                                  matrix_stride); 
-
-  const uint matrix_interpretation_12 = DataType::DATA_TYPE_SINT8_T4_PACKED;
-
-  // expected-error@+3 {{17 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_12, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_13 = DataType::DATA_TYPE_UINT8_T4_PACKED;
-
-  // expected-error@+3 {{18 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_13, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_14 = 23;
-
-  // expected-error@+3 {{23 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_14, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_interpretation_15 = 100;
-
-  // expected-error@+3 {{100 is an invalid memory interpretation value}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation_15, matrix_layout,
-                                  matrix_stride);                   
-                              
-}
-
-// Check for matrix layout is not a constant parameter
-void test_non_constant_matrix_layout() {
-
-  vector<float, 4> input_vector_0 = input_vector_buffer.Load<vector<float, 4> >(0);
-  vector<float, 4> input_vector_1 = input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_stride = 0;
-
-  const uint matrix_layout = constants_buffer.Load<uint>(0);
-
-  // expected-error@+3 {{expression is not an integer constant expression}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-}
-
-// Check for matrix layout is not a valid value
-void test_invalid_matrix_layout() {
-
-  vector<float, 4> input_vector_0 = input_vector_buffer.Load<vector<float, 4> >(0);
-  vector<float, 4> input_vector_1 = input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; 
-  const uint matrix_stride = 0;
-
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR;
-
-  // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-
-  const uint matrix_layout_2 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR;
-
-  // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout_2,
-                                  matrix_stride);
-
-  const uint matrix_layout_3 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL;
-
-  // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout_3,
-                                  matrix_stride);                               
-                                  
-}
-
-// Check for matrix stride is zero, if constant
-void test_zero_matrix_stride() {
-
-  vector<float, 4> input_vector_0 = input_vector_buffer.Load<vector<float, 4> >(0);
-  vector<float, 4> input_vector_1 = input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL;
-
-  const uint matrix_stride = 16;
-
-  // expected-error@+4 {{for optimal matrix layout, matrix stride must be 0}}
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/outer_product_accumulate_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/outer_product_accumulate_valid.hlsl
deleted file mode 100644
index 0af867dfdb..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/builtins/outer_product_accumulate_valid.hlsl
+++ /dev/null
@@ -1,66 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s -verify
-
-#include <dx/coopvec.h>
-
-using namespace dx::coopvec;
-
-ByteAddressBuffer input_vector_buffer;
-RWByteAddressBuffer accumulate_buffer;
-ByteAddressBuffer constants_buffer;
-
-// Check for input vectors aren't the same component type
-void test_invalid_input_vector_component_type() {
-
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL;
-  const uint matrix_stride = 0;
-
-  vector<float, 4> input_vector_0_0 = input_vector_buffer.Load<vector<float, 4> >(0);
-  vector<float, 16> input_vector_1_0 = input_vector_buffer.Load<vector<float, 16> >(0);
-
-      // expected-no-diagnostics@+1
-  __builtin_OuterProductAccumulate(input_vector_0_0, input_vector_1_0,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-
-  vector<int, 32> input_vector_0_1 = input_vector_buffer.Load<vector<int, 32> >(0);
-  vector<int ,16> input_vector_1_1 = input_vector_buffer.Load<vector<int, 16> >(0);
-
-     // expected-no-diagnostics@+1
-  __builtin_OuterProductAccumulate(input_vector_0_1, input_vector_1_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-
-  vector<uint, 4> input_vector_0_2 = input_vector_buffer.Load<vector<uint, 4> >(0);
-  vector<uint, 16> input_vector_1_2 = input_vector_buffer.Load<vector<uint, 16> >(0);
-
-  // expected-no-diagnostics@+1
-  __builtin_OuterProductAccumulate(input_vector_0_2, input_vector_1_2,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-}
-
-// Check for non constant matrix stride
-void test_non_constant_matrix_stride() {
-
-  vector<float, 4> input_vector_0 = input_vector_buffer.Load<vector<float, 4> >(0);
-  vector<float, 4> input_vector_1 = input_vector_buffer.Load<vector<float, 4> >(0);
-  const uint matrix_offset = 0;
-  const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32;
-  const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL;
-
-  const uint matrix_stride = constants_buffer.Load<uint>(0);
-
-  // expected-no-diagnostics@+4
-  __builtin_OuterProductAccumulate(input_vector_0, input_vector_1,
-                                  accumulate_buffer, matrix_offset,
-                                  matrix_interpretation, matrix_layout,
-                                  matrix_stride);
-}
-
-// Check for matrix stride is not a valid value
-
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/lit.local.cfg b/tools/clang/test/SemaHLSL/hlsl/coopvec/lit.local.cfg
deleted file mode 100644
index e7894132bb..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/lit.local.cfg
+++ /dev/null
@@ -1 +0,0 @@
-config.unsupported = "dxil-1-10" not in config.available_features
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/make-interp-vec-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/make-interp-vec-errors.hlsl
deleted file mode 100644
index c8fca761f9..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/make-interp-vec-errors.hlsl
+++ /dev/null
@@ -1,33 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 %s -verify
-
-#include <dx/coopvec.h>
-ByteAddressBuffer Buf;
-
-export float4 Test1(vector<float, 4> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT16, 4, 4, MATRIX_LAYOUT_MUL_OPTIMAL, true> Matrix = {
-      Buf, 0, 0};
-
-  // expected-error@+3{{no matching function for call to 'MakeInterpretedVector'}}
-  // expected-note@dx/coopvec.h:113{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}}
-  return Mul<float>(    
-      Matrix, MakeInterpretedVector<2>(Input));
-}
-
-enum DataType {
-  DATA_TYPE_InvalidType = 40
-};
-
-export float4 Test2(vector<float, 4> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT16, 4, 4, MATRIX_LAYOUT_MUL_OPTIMAL, true> Matrix = {
-      Buf, 0, 0};
-
-  // expected-error@+3{{no matching function for call to 'MakeInterpretedVector'}}
-  // expected-note@dx/coopvec.h:113{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}}
-  return Mul<float>(    
-      Matrix, MakeInterpretedVector<DATA_TYPE_InvalidType>(Input));
-}
-
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-mul-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-mul-errors.hlsl
deleted file mode 100644
index c583969db0..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-mul-errors.hlsl
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 %s -verify
-
-#include <dx/coopvec.h>
-
-ByteAddressBuffer Buf;
-
-vector<float, 128> MixUpVectorAndMatrixArguments(vector<float, 128> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_FLOAT16, 128, 128, MATRIX_LAYOUT_MUL_OPTIMAL> Matrix = {
-      Buf, 0, 0};
-
-  // expected-error@+2{{no matching function for call to 'Mul'}}
-  // expected-note@dx/coopvec.h:127{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}}
-  return Mul<float>(MakeInterpretedVector<DATA_TYPE_FLOAT16>(Input), Matrix);
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-mul-transpose-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-mul-transpose-errors.hlsl
deleted file mode 100644
index bd6e4767c5..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-mul-transpose-errors.hlsl
+++ /dev/null
@@ -1,30 +0,0 @@
-// XFAIL: *
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s -verify
-
-#include <dx/coopvec.h>
-
-ByteAddressBuffer Buf;
-
-export float4 Test1(vector<float, 4> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_FLOAT16, 4, 4, MATRIX_LAYOUT_ROW_MAJOR, true> Matrix = {
-      Buf, 0, 0};
-
-  // PREVIEW CHECK TODO:
-  // expected-error@+1{{something about transposing not supported for rowmajor / colmajor layouts}}
-  return Mul<float>(    
-      Matrix, MakeInterpretedVector<DATA_TYPE_FLOAT16>(Input));
-}
-
-export vector<float, 8> Test2(vector<uint8_t4_packed, 6> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_UINT8, 8, 6 * 4, MATRIX_LAYOUT_COLUMN_MAJOR> Matrix = {
-      Buf, 0, 0};
-
-  // PREVIEW CHECK TODO:
-  // expected-error@+1{{something about transposing not supported for rowmajor / colmajor layouts}}
-  return Mul<float>(Matrix,
-                    MakeInterpretedVector<DATA_TYPE_UINT8_T4_PACKED>(Input));
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-muladd-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-muladd-errors.hlsl
deleted file mode 100644
index f787aab084..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/mat-vec-muladd-errors.hlsl
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 %s -verify
-
-#include <dx/coopvec.h>
-
-ByteAddressBuffer Buf;
-
-vector<float, 128> MixUpVectorAndMatrixArguments(vector<float, 128> Input) {
-  using namespace dx::coopvec;
-
-  MatrixRef<DATA_TYPE_SINT16, 128, 128, MATRIX_LAYOUT_MUL_OPTIMAL> Matrix = {
-      Buf, 0, 0};
-
-  // expected-error@+2{{no matching function for call to 'MulAdd'}}
-  // expected-note@dx/coopvec.h:153{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}}
-  return MulAdd<float>(MakeInterpretedVector<DATA_TYPE_SINT16>(Input), Matrix, MakeInterpretedVector<DATA_TYPE_SINT16>(Input));
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/outerproductaccumulate-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/outerproductaccumulate-errors.hlsl
deleted file mode 100644
index e0a7e4d0de..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/outerproductaccumulate-errors.hlsl
+++ /dev/null
@@ -1,44 +0,0 @@
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 -enable-16bit-types %s -verify
-
-#include <dx/coopvec.h>
-
-RWByteAddressBuffer RWBuf;
-
-// test for inputs of different size
-export void Test4(vector<half, 128> Input1, vector<half, 64> Input2) {
-  using namespace dx::coopvec;
-
-  RWMatrixRef<DATA_TYPE_FLOAT16, 128, 64, MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, true>
-      matrix = {RWBuf, 0, 0};
-
-  // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}}
-  // expected-note@dx/coopvec.h:177{{candidate template ignored: could not match 0 against 1}}
-
-  OuterProductAccumulate(Input1, Input2, matrix);  
-}
-
-// now test for an error when element types differ
-export void Test5(vector<int, 128> Input1, vector<uint, 128> Input2) {
-  using namespace dx::coopvec;
-
-  RWMatrixRef<DATA_TYPE_FLOAT16, 128, 128, MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, true>
-      matrix = {RWBuf, 0, 0};
-
-  // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}}
-  // expected-note@dx/coopvec.h:177{{candidate template ignored: could not match 0 against 1}}
-
-  OuterProductAccumulate(Input1, Input2, matrix);  
-}
-
-// now test for an error when matrix transpose parameter is true
-export void Test4(vector<half, 64> Input1, vector<half, 64> Input2) {
-  using namespace dx::coopvec;
-
-  RWMatrixRef<DATA_TYPE_FLOAT16, 64, 64, MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL, true>
-      matrix = {RWBuf, 0, 0};
-
-  // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}}
-  // expected-note@dx/coopvec.h:177{{candidate template ignored: deduced conflicting types for parameter 'ElTy' ('int' vs. 'unsigned int')}}
-
-  OuterProductAccumulate(Input1, Input2, matrix);  
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/unavailable-pre-sm69.hlsl
deleted file mode 100644
index 4df0341048..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/unavailable-pre-sm69.hlsl
+++ /dev/null
@@ -1,59 +0,0 @@
-// RUN: %dxc -T lib_6_8 %s -verify
- 
-ByteAddressBuffer matrix_buffer;
-ByteAddressBuffer bias_buffer;
-RWByteAddressBuffer rw_matrix_buffer;
-
-[Shader("compute")]
-[Numthreads(1,1,1)]
-void cs_main()
-{    
-    vector<float, 4> output_vector;
-    static const uint is_output_unsigned = 0;
-    
-    vector<float, 4> input_vector;
-    const uint is_input_unsigned = 0;
-    const uint input_interpretation = 9; /*F32*/
-    
-    const uint matrix_offset = 0;
-    const uint matrix_interpretation = 9; /*F32*/
-    const uint matrix_dimM = 4;
-    const uint matrix_dimK = 4;
-    const uint matrix_layout = 0; /*RowMajor*/
-    const bool matrix_is_transposed = false; 
-    const uint matrix_stride = 64;
-
-    //expected-error@+1{{intrinsic __builtin_MatVecMul potentially used by ''cs_main'' requires shader model 6.10 or greater}}
-    __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, 
-      is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-      matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-      matrix_is_transposed, matrix_stride); 
-
-    const uint bias_offset = 0;
-    const uint bias_interpretation = 9; /*F32*/
-
-    //expected-error@+1{{intrinsic __builtin_MatVecMulAdd potentially used by ''cs_main'' requires shader model 6.10 or greater}}
-    __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector,
-      is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset,
-      matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout,
-      matrix_is_transposed, matrix_stride, bias_buffer, bias_offset,
-      bias_interpretation); 
-
-    vector<uint, 4> input_vector1;
-    vector<uint, 4> input_vector2;
-    const uint opa_matrix_offset = 0;
-    const uint opa_matrix_interpretation = 5; /*U32*/
-    const uint opa_matrix_layout = 3; /*OuterProductOptimal*/
-    const uint opa_matrix_stride = 0;
-
-    //expected-error@+1{{intrinsic __builtin_OuterProductAccumulate potentially used by ''cs_main'' requires shader model 6.10 or greater}}
-    __builtin_OuterProductAccumulate(input_vector1, input_vector2,
-      rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation,
-      opa_matrix_layout, opa_matrix_stride);
-
-    const uint va_matrix_offset = 0;
-
-    //expected-error@+1{{intrinsic __builtin_VectorAccumulate potentially used by ''cs_main'' requires shader model 6.10 or greater}}
-    __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer,
-      va_matrix_offset);
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/coopvec/vectoraccumulate-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/coopvec/vectoraccumulate-errors.hlsl
deleted file mode 100644
index d47ccb2ad2..0000000000
--- a/tools/clang/test/SemaHLSL/hlsl/coopvec/vectoraccumulate-errors.hlsl
+++ /dev/null
@@ -1,16 +0,0 @@
-// XFAIL: *
-// RUN: %dxc -I %hlsl_headers -T lib_6_10 %s | FileCheck %s
-
-#include <dx/coopvec.h>
-
-RWByteAddressBuffer RWBuf;
-
-export void Test5(vector<float, 128> Input) {
-  using namespace dx::coopvec;
-
-  RWBuf.Store<vector<half, 128> >(0, Input);
-
-  // PREVIEW CHECK TODO:
-  // CHECK: Something about an error due to illegal conversions
-  VectorAccumulate(Input, RWBuf, 0);
-}
diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl
index 625d75f3e1..7fb1bae34a 100644
--- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrix-builtins-ast.hlsl
@@ -15,14 +15,14 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} source '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} transpose 'bool'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 405
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 401
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_CopyConvertMatrix(mat2, mat1, true);
 
 // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_FillMatrix 'void (__builtin_LinAlgMatrix & {{.*}}, unsigned int)' extern
 // CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix &&__restrict {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} value 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 406
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 402
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_FillMatrix(mat1, 15);
 
@@ -30,7 +30,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixLHS '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixRHS '__builtin_LinAlgMatrix {{.*}}'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 415
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 411
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixAccumulate(mat1, mat2, mat3);
 
@@ -41,7 +41,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} align 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 419
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 415
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixAccumulateToDescriptor(mat1, Buf, 1, 2, 3, 4);
 
@@ -51,14 +51,14 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 420
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 416
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixAccumulateToMemory(mat1, SharedArr, 0, 0, 0);
 
 // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixGetCoordinate 'vector<uint, 2> (__builtin_LinAlgMatrix {{.*}}, unsigned int)' extern
 // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 407
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 403
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   uint2 coord = __builtin_LinAlg_MatrixGetCoordinate(mat1, 0);
 
@@ -66,7 +66,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} ret 'unsigned int &&__restrict'
 // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 408
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 404
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   uint elem1;
   __builtin_LinAlg_MatrixGetElement(elem1, mat1, 3);
@@ -75,14 +75,14 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} ret 'float &&__restrict'
 // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 408
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 404
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   float elem2;
   __builtin_LinAlg_MatrixGetElement(elem2, mat1, 4);
 
 // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixLength 'unsigned int (__builtin_LinAlgMatrix {{.*}})' extern
 // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 409
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 405
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixLength(mat1);
 
@@ -93,7 +93,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} align 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 410
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 406
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixLoadFromDescriptor(mat1, Buf, 0, 0, 0, 4);
 
@@ -103,7 +103,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 411
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 407
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixLoadFromMemory(mat1, SharedArr, 0, 0, 0);
 
@@ -111,7 +111,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixA '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixB '__builtin_LinAlgMatrix {{.*}}'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 416
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 412
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixMatrixMultiply(mat1, mat2, mat3);
 
@@ -120,7 +120,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixA '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixB '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} matrixC '__builtin_LinAlgMatrix {{.*}}'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 417
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 413
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixMatrixMultiplyAccumulate(mat1, mat2, mat3, mat1);
 
@@ -128,14 +128,14 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} ret '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} vecA 'vector<int, 4>':'vector<int, 4>'
 // CHECK-NEXT: ParmVarDecl {{.*}} vecB 'vector<int, 4>':'vector<int, 4>'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 421
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 417
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   int4 vecA = {1,2,3,4};
   int4 vecB = {1,2,3,4};
   __builtin_LinAlg_MatrixOuterProduct(mat1, vecA, vecB);
 
 // CHECK: FunctionDecl {{.*}} implicit used __builtin_LinAlg_MatrixQueryAccumulatorLayout 'unsigned int ()' extern
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 418
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 414
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   uint layout = __builtin_LinAlg_MatrixQueryAccumulatorLayout();
 
@@ -144,7 +144,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} matrix '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} threadLocalIndex 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} value 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 412
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 408
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixSetElement(mat2, mat1, 1, 1);
 
@@ -155,7 +155,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} align 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 413
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 409
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   __builtin_LinAlg_MatrixStoreToDescriptor(mat1, Buf, 1, 2, 3, 4);
 
@@ -165,7 +165,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 414
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 410
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
 
   __builtin_LinAlg_MatrixStoreToMemory(mat1, SharedArr, 0, 0, 0);
@@ -175,7 +175,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} mat '__builtin_LinAlgMatrix {{.*}}'
 // CHECK-NEXT: ParmVarDecl {{.*}} input 'vector<float, 4>':'vector<float, 4>'
 // CHECK-NEXT: ParmVarDecl {{.*}} input_interp 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 422
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 418
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   float4 vec = {1,2,3,4};
   float4 result;
@@ -188,7 +188,7 @@ void main() {
 // CHECK-NEXT: ParmVarDecl {{.*}} input_interp 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} bias 'vector<float, 4>':'vector<float, 4>'
 // CHECK-NEXT: ParmVarDecl {{.*}} bias_interp 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 423
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 419
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
   float4 input = {1,2,3,4};
   float4 bias = {5,6,7,8};
diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl
index d300796b67..4cae1086bf 100644
--- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixaccumulatetomemory/ast.hlsl
@@ -7,7 +7,7 @@
 // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 420
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 416
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
 
 groupshared float SharedArr[64];
diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl
index 3ac0de3880..d297e75b11 100644
--- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixloadfrommemory/ast.hlsl
@@ -7,7 +7,7 @@
 // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 411
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 407
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
 
 groupshared float SharedArr[64];
diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl
index c726d119eb..08b7658b10 100644
--- a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl
+++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/matrixstoretomemory/ast.hlsl
@@ -7,7 +7,7 @@
 // CHECK-NEXT: ParmVarDecl {{.*}} offset 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} stride 'unsigned int'
 // CHECK-NEXT: ParmVarDecl {{.*}} layout 'unsigned int'
-// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 414
+// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 410
 // CHECK-NEXT: AvailabilityAttr {{.*}} Implicit  6.10 0 0 ""
 
 groupshared float SharedArr[64];
diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt
index 70efad7803..47a5641944 100644
--- a/utils/hct/gen_intrin_main.txt
+++ b/utils/hct/gen_intrin_main.txt
@@ -387,14 +387,6 @@ void [[]] Barrier(in NodeRecordOrUAV o, in uint SemanticFlags);
 
 uint [[]] GetRemainingRecursionLevels();
 
-void [[min_sm=6.10]] __builtin_MatVecMul(out LinAlg<c> OutputVector, in bool OutputIsUnsigned, in LinAlg<c2> InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride);
-
-void [[min_sm=6.10]] __builtin_MatVecMulAdd(out LinAlg<c> OutputVector, in bool OutputIsUnsigned, in LinAlg<c2> InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride, in ByteAddressBuffer BiasVector, in uint BiasOffset, in uint BiasInterpretation);
-
-void [[min_sm=6.10]] __builtin_OuterProductAccumulate(in LinAlg<c> InputVector1, in LinAlg<c2> InputVector2, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint MatrixLayout, in uint MatrixStride);
-
-void [[min_sm=6.10]] __builtin_VectorAccumulate(in LinAlg<c> InputVector, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset);
-
 // LinAlg intrinsics
 
 void [[min_sm=6.10]] __builtin_LinAlg_FillMatrix(out LinAlgMatrix ret, in numeric value);
diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py
index b55601d11b..ff03097639 100644
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@@ -1087,11 +1087,6 @@ def populate_categories_and_models(self):
                 "library",
                 "raygeneration",
             )
-        for i in (
-            "MatVecMul,MatVecMulAdd,OuterProductAccumulate,VectorAccumulate"
-        ).split(","):
-            self.name_idx[i].category = "Linear Algebra Operations"
-            self.name_idx[i].shader_model = 6, 10
         # End of core DXIL ops
         self.populate_categories_and_models_ExperimentalOps()
 
@@ -6075,94 +6070,7 @@ def UFI(name, **mappings):
             counters=("tex_store",),
         )
 
-        add_dxil_op(
-            "MatVecMul",
-            "MatVecMul",
-            "Multiplies a MxK dimension matrix and a K sized input vector",
-            "<hfwi,<hfwi",
-            "ro",
-            [
-                db_dxil_param(0, "$x0", "outputVector", "output vector"),
-                db_dxil_param(2, "$x1", "inputVector", "input vector"),
-                db_dxil_param(3, "i1", "isInputUnsigned", "is input unsigned"),
-                db_dxil_param(4, "i32", "inputInterpretation", "input interpretation"),
-                db_dxil_param(5, "res", "matrixBuffer", "matrix resource"),
-                db_dxil_param(6, "i32", "matrixOffset", "matrix offset"),
-                db_dxil_param(7, "i32", "matrixIntepretation", "matrix intepretation"),
-                db_dxil_param(8, "i32", "matrixM", "matrix M dimension"),
-                db_dxil_param(9, "i32", "matrixK", "matrix K dimension"),
-                db_dxil_param(10, "i32", "matrixLayout", "matrix layout"),
-                db_dxil_param(11, "i1", "matrixTranspose", "matrix transpose"),
-                db_dxil_param(12, "i32", "matrixStride", "matrix stride"),
-                db_dxil_param(13, "i1", "isOutputUnsigned", "is output unsigned"),
-            ],
-        )
-
-        add_dxil_op(
-            "MatVecMulAdd",
-            "MatVecMulAdd",
-            "multiplies a MxK dimension matrix and a K sized input vector and adds an M-sized bias vector",
-            "<hfwi,<hfwi",
-            "ro",
-            [
-                db_dxil_param(0, "$x0", "outputVector", "output vector"),
-                db_dxil_param(2, "$x1", "inputVector", "input vector"),
-                db_dxil_param(3, "i1", "isInputUnsigned", "is input unsigned"),
-                db_dxil_param(4, "i32", "inputInterpretation", "input interpretation"),
-                db_dxil_param(5, "res", "matrixBuffer", "matrix resource"),
-                db_dxil_param(6, "i32", "matrixOffset", "matrix offset"),
-                db_dxil_param(7, "i32", "matrixIntepretation", "matrix intepretation"),
-                db_dxil_param(8, "i32", "matrixM", "matrix M dimension"),
-                db_dxil_param(9, "i32", "matrixK", "matrix K dimension"),
-                db_dxil_param(10, "i32", "matrixLayout", "matrix layout"),
-                db_dxil_param(11, "i1", "matrixTranspose", "matrix transpose"),
-                db_dxil_param(12, "i32", "matrixStride", "matrix stride"),
-                db_dxil_param(13, "res", "biasBuffer", "bias vector resource"),
-                db_dxil_param(14, "i32", "biasOffset", "bias vector offset"),
-                db_dxil_param(
-                    15, "i32", "biasIntepretation", "bias vector intepretation"
-                ),
-                db_dxil_param(16, "i1", "isOutputUnsigned", "is output unsigned"),
-            ],
-        )
-
-        add_dxil_op(
-            "OuterProductAccumulate",
-            "OuterProductAccumulate",
-            "Computes the outer product between column vectors and an MxN matrix is accumulated component-wise atomically (with device scope) in memory",
-            "<hfwi,<hfwi",
-            "",
-            [
-                db_dxil_param(0, "v", "", ""),
-                db_dxil_param(2, "$x0", "inputVector1", "input vector 1"),
-                db_dxil_param(3, "$x1", "inputVector2", "input vector 2"),
-                db_dxil_param(4, "res", "matrixBuffer", "matrix resource"),
-                db_dxil_param(5, "i32", "matrixOffset", "matrix offset"),
-                db_dxil_param(
-                    6,
-                    "i32",
-                    "matrixIntepretation",
-                    "matrix intepretation",
-                    is_const=True,
-                ),
-                db_dxil_param(7, "i32", "matrixLayout", "matrix layout", is_const=True),
-                db_dxil_param(8, "i32", "matrixStride", "matrix stride"),
-            ],
-        )
-
-        add_dxil_op(
-            "VectorAccumulate",
-            "VectorAccumulate",
-            "Accumulates the components of a vector component-wise atomically (with device scope) to the corresponding elements of an array in memory",
-            "<hfwi",
-            "",
-            [
-                db_dxil_param(0, "v", "", ""),
-                db_dxil_param(2, "$o", "inputVector", "input vector 1"),
-                db_dxil_param(3, "res", "arrayBuffer", "output array resource"),
-                db_dxil_param(4, "i32", "arrayOffset", "output array offset"),
-            ],
-        )
+        reserve_dxil_op_range("ReservedD", 4)
 
         # Long Vector Reduction
         add_dxil_op(
@@ -6664,7 +6572,7 @@ def populate_ExperimentalOps(self):
             ],
         )
 
-        op_table.reserve_dxil_op_range("ReservedD", 3, 1)
+        op_table.reserve_dxil_op_range("ReservedE", 3, 1)
 
         # Debugging intrinsics
         add_dxil_op(
@@ -8689,67 +8597,6 @@ def build_valrules(self):
             "reordercoherent requires SM 6.9 or later.",
         )
 
-        # Linalg ops
-        self.add_valrule_msg(
-            "Instr.MatVecOpIsUnsignedFlagsAreConst",
-            "In Linalg Mul/MulAdd functions, IsUnsigned flag is a constant.",
-            "%0 is not a constant value",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgInterpretationParamAreConst",
-            "In Linalg operations, Interpretation value is a constant.",
-            "%0 is not a constant value",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgInvalidRegisterInterpValue",
-            "From Register Interpretation value must be valid.",
-            "'%0' is not a valid %1 interpretation value",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgInvalidMemoryInterpValue",
-            "In Memory Interpolation value must be valid.",
-            "'%0' is not a valid %1 interpretation value",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgMatrixShapeParamsAreConst",
-            "Matrix Layout, Dimensions and isTranspose are constants",
-            "'%0' is not a constant value",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgInvalidMatrixLayoutValueForMatVecOps",
-            "Matrix Layout for Linalg Mul/MulAdd operation must be valid.",
-            "matrix layout value '%0' is not valid. Must be between [%1 - %2]",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgMatrixStrideZeroForOptimalLayouts",
-            "For optimal layouts, matrix stride must be zero.",
-            "matrix stride must be a constant zero for optimal layouts",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgMatrixLayoutNotTransposable",
-            "Row Major and Column Major matrix layouts are not transposable.",
-            "%0 matrix layout is not transposable",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgNotAnUnsignedType",
-            "Unsigned flag set for a float signed type",
-            "IsUnsigned flag set to true for a float type '%0' vector",
-        )
-
-        self.add_valrule_msg(
-            "Instr.LinalgInvalidMatrixLayoutValueForOuterProductAccumulate",
-            "Matrix Layout for Linalg Mul/MulAdd operation must be valid.",
-            "matrix layout value '%0' is not valid for outerproductaccumulate, must be '%1'",
-        )
-
         # Some legacy rules:
         # - space is only supported for shader targets 5.1 and higher
         # - multiple rules regarding derivatives, which isn't a supported feature for DXIL
diff --git a/utils/hct/hlsl_intrinsic_opcodes.json b/utils/hct/hlsl_intrinsic_opcodes.json
index fdd78ab034..4935a371b4 100644
--- a/utils/hct/hlsl_intrinsic_opcodes.json
+++ b/utils/hct/hlsl_intrinsic_opcodes.json
@@ -1,6 +1,6 @@
 {
   "IntrinsicOpCodes": {
-    "Num_Intrinsics": 426,
+    "Num_Intrinsics": 422,
     "IOP_AcceptHitAndEndSearch": 0,
     "IOP_AddUint64": 1,
     "IOP_AllMemoryBarrier": 2,
@@ -391,41 +391,37 @@
     "MOP_DxHitObject_MakeMiss": 387,
     "MOP_DxHitObject_SetShaderTableIndex": 388,
     "MOP_DxHitObject_TraceRay": 389,
-    "IOP___builtin_MatVecMul": 390,
-    "IOP___builtin_MatVecMulAdd": 391,
-    "IOP___builtin_OuterProductAccumulate": 392,
-    "IOP___builtin_VectorAccumulate": 393,
-    "IOP_isnormal": 394,
-    "IOP_GetGroupWaveCount": 395,
-    "IOP_GetGroupWaveIndex": 396,
-    "IOP_ClusterID": 397,
-    "MOP_CandidateClusterID": 398,
-    "MOP_CommittedClusterID": 399,
-    "MOP_DxHitObject_GetClusterID": 400,
-    "IOP_TriangleObjectPositions": 401,
-    "MOP_CandidateTriangleObjectPositions": 402,
-    "MOP_CommittedTriangleObjectPositions": 403,
-    "MOP_DxHitObject_TriangleObjectPositions": 404,
-    "IOP___builtin_LinAlg_CopyConvertMatrix": 405,
-    "IOP___builtin_LinAlg_FillMatrix": 406,
-    "IOP___builtin_LinAlg_MatrixGetCoordinate": 407,
-    "IOP___builtin_LinAlg_MatrixGetElement": 408,
-    "IOP___builtin_LinAlg_MatrixLength": 409,
-    "IOP___builtin_LinAlg_MatrixLoadFromDescriptor": 410,
-    "IOP___builtin_LinAlg_MatrixLoadFromMemory": 411,
-    "IOP___builtin_LinAlg_MatrixSetElement": 412,
-    "IOP___builtin_LinAlg_MatrixStoreToDescriptor": 413,
-    "IOP___builtin_LinAlg_MatrixStoreToMemory": 414,
-    "IOP___builtin_LinAlg_MatrixAccumulate": 415,
-    "IOP___builtin_LinAlg_MatrixMatrixMultiply": 416,
-    "IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate": 417,
-    "IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout": 418,
-    "IOP___builtin_LinAlg_MatrixAccumulateToDescriptor": 419,
-    "IOP___builtin_LinAlg_MatrixAccumulateToMemory": 420,
-    "IOP___builtin_LinAlg_MatrixOuterProduct": 421,
-    "IOP___builtin_LinAlg_MatrixVectorMultiply": 422,
-    "IOP___builtin_LinAlg_MatrixVectorMultiplyAdd": 423,
-    "IOP_DebugBreak": 424,
-    "IOP_DxIsDebuggerPresent": 425
+    "IOP_isnormal": 390,
+    "IOP_GetGroupWaveCount": 391,
+    "IOP_GetGroupWaveIndex": 392,
+    "IOP_ClusterID": 393,
+    "MOP_CandidateClusterID": 394,
+    "MOP_CommittedClusterID": 395,
+    "MOP_DxHitObject_GetClusterID": 396,
+    "IOP_TriangleObjectPositions": 397,
+    "MOP_CandidateTriangleObjectPositions": 398,
+    "MOP_CommittedTriangleObjectPositions": 399,
+    "MOP_DxHitObject_TriangleObjectPositions": 400,
+    "IOP___builtin_LinAlg_CopyConvertMatrix": 401,
+    "IOP___builtin_LinAlg_FillMatrix": 402,
+    "IOP___builtin_LinAlg_MatrixGetCoordinate": 403,
+    "IOP___builtin_LinAlg_MatrixGetElement": 404,
+    "IOP___builtin_LinAlg_MatrixLength": 405,
+    "IOP___builtin_LinAlg_MatrixLoadFromDescriptor": 406,
+    "IOP___builtin_LinAlg_MatrixLoadFromMemory": 407,
+    "IOP___builtin_LinAlg_MatrixSetElement": 408,
+    "IOP___builtin_LinAlg_MatrixStoreToDescriptor": 409,
+    "IOP___builtin_LinAlg_MatrixStoreToMemory": 410,
+    "IOP___builtin_LinAlg_MatrixAccumulate": 411,
+    "IOP___builtin_LinAlg_MatrixMatrixMultiply": 412,
+    "IOP___builtin_LinAlg_MatrixMatrixMultiplyAccumulate": 413,
+    "IOP___builtin_LinAlg_MatrixQueryAccumulatorLayout": 414,
+    "IOP___builtin_LinAlg_MatrixAccumulateToDescriptor": 415,
+    "IOP___builtin_LinAlg_MatrixAccumulateToMemory": 416,
+    "IOP___builtin_LinAlg_MatrixOuterProduct": 417,
+    "IOP___builtin_LinAlg_MatrixVectorMultiply": 418,
+    "IOP___builtin_LinAlg_MatrixVectorMultiplyAdd": 419,
+    "IOP_DebugBreak": 420,
+    "IOP_DxIsDebuggerPresent": 421
   }
 }