From ceb4c01cbfa3076cdd2d09732bf295b4ff2a7886 Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alexsepkowski@gmail.com>
Date: Tue, 31 Mar 2026 13:19:52 -0700
Subject: [PATCH 1/2] Fix rawBufferVectorLoad/Store to widen min precision
 types to 32-bit (#8274)

## Summary

Fixes `RawBufferVectorLoad`/`Store` to use 32-bit element types
(`i32`/`f32`) for min precision types (`min16int`, `min16uint`,
`min16float`) instead of 16-bit (`i16`/`f16`). This matches how
pre-SM6.9 `RawBufferLoad` handles min precision.

Resolves #8273

## Root Cause

`TranslateBufLoad` in `HLOperationLower.cpp` creates the vector type
directly from the min precision element type (`i16`/`f16`) without
widening to `i32`/`f32`. This causes WARP (and potentially other
drivers) to load/store 2 bytes per element instead of 4, mismatching the
buffer layout.

## Fix

Apply the same widening pattern used for bool types:
- **Load**: Load as `v_i32`/`v_f32`, then trunc/fptrunc back to
`i16`/`half`
- **Store**: `sext`/`fpext` to `i32`/`f32`, then store as
`v_i32`/`v_f32`

## Testing

Added FileCheck test verifying all 3 min precision types produce
`i32`/`f32` vector load/store ops.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

---------

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Co-authored-by: Tex Riddell <texr@microsoft.com>
---
 lib/HLSL/DxilGenerationPass.cpp               | 75 +++++++++++--------
 lib/HLSL/HLOperationLower.cpp                 | 51 ++++++++++++-
 .../dxil/debug/min16/min16float_vec.hlsl      | 16 ++--
 .../dxil/debug/min16/min16int_vec.hlsl        | 16 ++--
 .../dxil/debug/min16/min16uint_vec.hlsl       | 16 ++--
 .../min_precision_raw_load_store.hlsl         | 62 +++++++++++++++
 6 files changed, 180 insertions(+), 56 deletions(-)
 create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl
diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp
index c3a6ad7dfc..3930cc3f2f 100644
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@@ -993,11 +993,10 @@ void ReplaceMinPrecisionRawBufferStoreByType(
         Args.emplace_back(NewV);
       }
     } else if (FromTy->isIntegerTy()) {
-      // This case only applies to typed buffer since Store operation of byte
-      // address buffer for min precision is handled by implicit conversion on
-      // intrinsic call. Since we are extending integer, we have to know if we
-      // should sign ext or zero ext. We can do this by iterating checking the
-      // size of the element at struct type and comp type at type annotation
+      // Since we are extending integer, we have to know if we should sign ext
+      // or zero ext. For StructuredBuffers we get signedness from the struct
+      // type annotation. For ByteAddressBuffer (raw buffers) there is no struct
+      // annotation, so we fall back to sext as a conservative default.
       CallInst *handleCI = dyn_cast<CallInst>(
           CI->getArgOperand(DxilInst_RawBufferStore::arg_uav));
       DXASSERT(handleCI,
@@ -1007,34 +1006,50 @@ void ReplaceMinPrecisionRawBufferStoreByType(
                "otherwise fail to handle for buffer store lost its retTy");
       StructType *STy = dyn_cast<StructType>(resTyIt->second);
 
-      STy = cast<StructType>(STy->getElementType(0));
-      DxilStructAnnotation *SAnnot = typeSys.GetStructAnnotation(STy);
-      ConstantInt *offsetInt = dyn_cast<ConstantInt>(
-          CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset));
-      unsigned offset = offsetInt->getSExtValue();
-      unsigned currentOffset = 0;
-      for (DxilStructTypeIterator iter = begin(STy, SAnnot),
-                                  ItEnd = end(STy, SAnnot);
-           iter != ItEnd; ++iter) {
-        std::pair<Type *, DxilFieldAnnotation *> pair = *iter;
-        currentOffset += DL.getTypeAllocSize(pair.first);
-        if (currentOffset > offset) {
-          if (pair.second->GetCompType().IsUIntTy()) {
-            for (unsigned i = 4; i < 8; ++i) {
-              Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy);
-              Args.emplace_back(NewV);
+      StructType *InnerSTy =
+          STy ? dyn_cast<StructType>(STy->getElementType(0)) : nullptr;
+      DxilStructAnnotation *SAnnot =
+          InnerSTy ? typeSys.GetStructAnnotation(InnerSTy) : nullptr;
+
+      if (SAnnot) {
+        // StructuredBuffer path: use struct annotation to determine signedness.
+        ConstantInt *offsetInt = dyn_cast<ConstantInt>(
+            CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset));
+        unsigned offset = offsetInt->getSExtValue();
+        unsigned currentOffset = 0;
+        for (DxilStructTypeIterator iter = begin(InnerSTy, SAnnot),
+                                    ItEnd = end(InnerSTy, SAnnot);
+             iter != ItEnd; ++iter) {
+          std::pair<Type *, DxilFieldAnnotation *> pair = *iter;
+          currentOffset += DL.getTypeAllocSize(pair.first);
+          if (currentOffset > offset) {
+            if (pair.second->GetCompType().IsUIntTy()) {
+              for (unsigned i = 4; i < 8; ++i) {
+                Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy);
+                Args.emplace_back(NewV);
+              }
+              break;
+            } else if (pair.second->GetCompType().IsIntTy()) {
+              for (unsigned i = 4; i < 8; ++i) {
+                Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy);
+                Args.emplace_back(NewV);
+              }
+              break;
+            } else {
+              DXASSERT(false, "Invalid comp type");
             }
-            break;
-          } else if (pair.second->GetCompType().IsIntTy()) {
-            for (unsigned i = 4; i < 8; ++i) {
-              Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy);
-              Args.emplace_back(NewV);
-            }
-            break;
-          } else {
-            DXASSERT(false, "Invalid comp type");
           }
         }
+      } else {
+        // ByteAddressBuffer path: no struct annotation available, so
+        // signedness is unknown. Default to sext.
+        for (unsigned i = 4; i < 8; ++i) {
+          Value *Arg = CI->getArgOperand(i);
+          if (isa<UndefValue>(Arg))
+            Args.emplace_back(UndefValue::get(ToTy));
+          else
+            Args.emplace_back(CIBuilder.CreateSExt(Arg, ToTy));
+        }
       }
     }
 
diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp
index 69ce941178..ac3994c2ac 100644
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@@ -4308,6 +4308,23 @@ static SmallVector<Value *, 10> GetBufLoadArgs(ResLoadHelper helper,
   return Args;
 }
 
+static bool isMinPrecisionType(Type *EltTy, const DataLayout &DL) {
+  return !EltTy->isIntegerTy(1) &&
+         DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits();
+}
+
+static Type *widenMinPrecisionType(Type *Ty, LLVMContext &Ctx,
+                                   const DataLayout &DL) {
+  Type *EltTy = Ty->getScalarType();
+  if (!isMinPrecisionType(EltTy, DL))
+    return Ty;
+  Type *WideTy = EltTy->isFloatingPointTy() ? Type::getFloatTy(Ctx)
+                                            : Type::getInt32Ty(Ctx);
+  if (Ty->isVectorTy())
+    return VectorType::get(WideTy, Ty->getVectorNumElements());
+  return WideTy;
+}
+
 // Emits as many calls as needed to load the full vector
 // Performs any needed extractions and conversions of the results.
 Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
@@ -4321,10 +4338,13 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
     NumComponents = Ty->getVectorNumElements();
 
   const bool isTyped = DXIL::IsTyped(RK);
-  Type *EltTy = Ty->getScalarType();
+  Type *OrigEltTy = Ty->getScalarType();
+  Type *WidenedTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
+  Type *EltTy = WidenedTy->getScalarType();
+  const bool isMinPrec = (WidenedTy != Ty);
   const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy());
   const bool isBool = EltTy->isIntegerTy(1);
-  // Values will be loaded in memory representations.
+  // DXIL buffer loads require i32; narrow types are reconverted after load.
   if (isBool || (is64 && isTyped))
     EltTy = Builder.getInt32Ty();
 
@@ -4440,6 +4460,14 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK,
     retValNew = Builder.CreateICmpNE(
         retValNew, Constant::getNullValue(retValNew->getType()));
 
+  // DXIL loads min precision as 32-bit; narrow back to original IR type.
+  if (isMinPrec) {
+    if (OrigEltTy->isIntegerTy())
+      retValNew = Builder.CreateTrunc(retValNew, Ty);
+    else
+      retValNew = Builder.CreateFPTrunc(retValNew, Ty);
+  }
+
   helper.retVal->replaceAllUsesWith(retValNew);
   helper.retVal = retValNew;
 
@@ -4560,6 +4588,25 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val,
     val = Builder.CreateZExt(val, Ty);
   }
 
+  // Min precision alloc size is 32-bit; widen to match store intrinsic.
+  // Scalar RawBufferStore widening is handled by TranslateMinPrecisionRawBuffer
+  // in DxilGenerationPass, which has signedness info from struct annotations.
+  if (opcode == OP::OpCode::RawBufferVectorStore) {
+    const DataLayout &DL =
+        OP->GetModule()->GetHLModule().GetModule()->getDataLayout();
+    Type *WideTy = widenMinPrecisionType(Ty, Builder.getContext(), DL);
+    if (WideTy != Ty) {
+      if (EltTy->isFloatingPointTy())
+        val = Builder.CreateFPExt(val, WideTy);
+      else
+        // TODO(#8314): Signedness info is lost by this point; SExt is wrong
+        // for min16uint. Front-end should widen during Clang CodeGen instead.
+        val = Builder.CreateSExt(val, WideTy);
+      EltTy = WideTy->getScalarType();
+      Ty = WideTy;
+    }
+  }
+
   // If RawBuffer store of 64-bit value, don't set alignment to 8,
   // since buffer alignment isn't known to be anything over 4.
   unsigned alignValue = OP->GetAllocSizeForType(EltTy);
diff --git a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl
index 60fff4a6df..5dae57d261 100644
--- a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl
+++ b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl
@@ -16,20 +16,20 @@ void main()
 {
     Foo foo = buf[0];
     // foo.m_B.x
-    // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
 
     // foo.m_B.y
-    // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
 
     // foo.m_B.z
-    // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
 
     // foo.m_A.x
-    // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
 
     min16float value1 = foo.m_B.x;
     min16float value2 = foo.m_B.y;
diff --git a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl
index a16a006b76..b7af2cf87d 100644
--- a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl
+++ b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl
@@ -16,20 +16,20 @@ void main()
 {
     Foo foo = buf[0];
     // foo.m_B.x
-    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
 
     // foo.m_B.y
-    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
 
     // foo.m_B.z
-    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
 
     // foo.m_A.x
-    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
 
     min16int value1 = foo.m_B.x;
     min16int value2 = foo.m_B.y;
diff --git a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl
index e09a944a44..488c0385f9 100644
--- a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl
+++ b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl
@@ -16,20 +16,20 @@ void main()
 {
     Foo foo = buf[0];
     // foo.m_B.x
-    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16)
 
     // foo.m_B.y
-    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16)
 
     // foo.m_B.z
-    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16)
 
     // foo.m_A.x
-    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
-    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
+    // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
+    // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16)
 
     min16int value1 = foo.m_B.x;
     min16int value2 = foo.m_B.y;
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl
new file mode 100644
index 0000000000..38485f7de1
--- /dev/null
+++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl
@@ -0,0 +1,62 @@
+// RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s
+
+// Regression test for min precision rawBufferLoad/Store.
+// Min precision types should use i32/f32 operations (not i16/f16)
+// to match how pre-SM6.9 RawBufferLoad handles min precision.
+
+RWByteAddressBuffer g_buf : register(u0);
+
+[numthreads(1,1,1)]
+void main() {
+  // === Vector loads/stores (RawBufferVectorLoad/Store) ===
+
+  // min16int: should load as v3i32, not v3i16
+  // CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
+  min16int3 vi = g_buf.Load< min16int3 >(0);
+  // CHECK: call void @dx.op.rawBufferVectorStore.v3i32
+  g_buf.Store< min16int3 >(12, vi);
+
+  // min16uint: should load as v3i32, not v3i16
+  // CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32
+  min16uint3 vu = g_buf.Load< min16uint3 >(24);
+  // CHECK: call void @dx.op.rawBufferVectorStore.v3i32
+  g_buf.Store< min16uint3 >(36, vu);
+
+  // min16float: should load as v3f32, not v3f16
+  // CHECK: call %dx.types.ResRet.v3f32 @dx.op.rawBufferVectorLoad.v3f32
+  // CHECK: fptrunc <3 x float> {{.*}} to <3 x half>
+  min16float3 vf = g_buf.Load< min16float3 >(48);
+  // CHECK: fpext <3 x half> {{.*}} to <3 x float>
+  // CHECK: call void @dx.op.rawBufferVectorStore.v3f32
+  g_buf.Store< min16float3 >(60, vf);
+
+  // === Scalar loads/stores (RawBufferLoad/Store) ===
+
+  // min16int scalar: should use i32 rawBufferStore
+  // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+  min16int si = g_buf.Load< min16int >(72);
+  // CHECK: call void @dx.op.rawBufferStore.i32
+  g_buf.Store< min16int >(76, si);
+
+  // min16uint scalar: should use i32 rawBufferStore
+  // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
+  min16uint su = g_buf.Load< min16uint >(80);
+  // CHECK: call void @dx.op.rawBufferStore.i32
+  g_buf.Store< min16uint >(84, su);
+
+  // min16float scalar: should use f32 rawBufferStore
+  // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
+  min16float sf = g_buf.Load< min16float >(88);
+  // CHECK: call void @dx.op.rawBufferStore.f32
+  g_buf.Store< min16float >(92, sf);
+
+  // Verify i16/f16 ops are NOT used.
+  // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16
+  // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16
+  // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16
+  // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16
+  // CHECK-NOT: rawBufferLoad.i16
+  // CHECK-NOT: rawBufferStore.i16
+  // CHECK-NOT: rawBufferLoad.f16
+  // CHECK-NOT: rawBufferStore.f16
+}

From 9e8fae916168fcaa65d35aa887ab3ebe7794d9fb Mon Sep 17 00:00:00 2001
From: Alex Sepkowski <alexsepkowski@gmail.com>
Date: Wed, 1 Apr 2026 12:09:58 -0700
Subject: [PATCH 2/2] Revert out-of-scope DxilGenerationPass changes from #8274
 (#8321)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

Reverts the `DxilGenerationPass` ByteAddressBuffer scalar store changes
and removes scalar store tests that were included in PR #8274. These
changes were out of scope for the vector load/store fix and, on further
discussion, were concluded to be incomplete.

Follow-up issue for the proper fix: #8322

## What changed

- **DxilGenerationPass.cpp**: Reverted the ByteAddressBuffer fallback
path added in `ReplaceMinPrecisionRawBufferStoreByType`. The `sext`
fallback was wrong for `min16uint` (loses signedness), and the fix
belongs in CodeGen where signedness info is still available.
- **min_precision_raw_load_store.hlsl**: Removed scalar load/store
tests. Scalar `ByteAddressBuffer::Store<min16int>()` hits a pre-existing
crash in `TranslateMinPrecisionRawBuffer` (`cast<StructType>` on
ByteAddressBuffer's `i32` inner element). Test now covers vector
loads/stores only, which is the scope of the original fix.

## Context

The original PR #8274 correctly fixed `RawBufferVectorLoad/Store` to
widen min precision types to 32-bit. However, it also added a
ByteAddressBuffer scalar store fix in `DxilGenerationPass` that:
1. Crept outside the scope of the vector load/store fix
2. Was incomplete — the `sext` fallback is wrong for unsigned types
(`min16uint`)
3. Should instead be handled during Clang CodeGen, where signedness
information is available

Scalar ByteAddressBuffer template store widening for min precision types
is a separate pre-existing issue that needs a proper fix in CodeGen.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 lib/HLSL/DxilGenerationPass.cpp               | 75 ++++++++-----------
 .../min_precision_raw_load_store.hlsl         | 26 +------
 2 files changed, 31 insertions(+), 70 deletions(-)

diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp
index 3930cc3f2f..c3a6ad7dfc 100644
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@@ -993,10 +993,11 @@ void ReplaceMinPrecisionRawBufferStoreByType(
         Args.emplace_back(NewV);
       }
     } else if (FromTy->isIntegerTy()) {
-      // Since we are extending integer, we have to know if we should sign ext
-      // or zero ext. For StructuredBuffers we get signedness from the struct
-      // type annotation. For ByteAddressBuffer (raw buffers) there is no struct
-      // annotation, so we fall back to sext as a conservative default.
+      // This case only applies to typed buffer since Store operation of byte
+      // address buffer for min precision is handled by implicit conversion on
+      // intrinsic call. Since we are extending integer, we have to know if we
+      // should sign ext or zero ext. We can do this by iterating checking the
+      // size of the element at struct type and comp type at type annotation
       CallInst *handleCI = dyn_cast<CallInst>(
           CI->getArgOperand(DxilInst_RawBufferStore::arg_uav));
       DXASSERT(handleCI,
@@ -1006,50 +1007,34 @@ void ReplaceMinPrecisionRawBufferStoreByType(
                "otherwise fail to handle for buffer store lost its retTy");
       StructType *STy = dyn_cast<StructType>(resTyIt->second);
 
-      StructType *InnerSTy =
-          STy ? dyn_cast<StructType>(STy->getElementType(0)) : nullptr;
-      DxilStructAnnotation *SAnnot =
-          InnerSTy ? typeSys.GetStructAnnotation(InnerSTy) : nullptr;
-
-      if (SAnnot) {
-        // StructuredBuffer path: use struct annotation to determine signedness.
-        ConstantInt *offsetInt = dyn_cast<ConstantInt>(
-            CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset));
-        unsigned offset = offsetInt->getSExtValue();
-        unsigned currentOffset = 0;
-        for (DxilStructTypeIterator iter = begin(InnerSTy, SAnnot),
-                                    ItEnd = end(InnerSTy, SAnnot);
-             iter != ItEnd; ++iter) {
-          std::pair<Type *, DxilFieldAnnotation *> pair = *iter;
-          currentOffset += DL.getTypeAllocSize(pair.first);
-          if (currentOffset > offset) {
-            if (pair.second->GetCompType().IsUIntTy()) {
-              for (unsigned i = 4; i < 8; ++i) {
-                Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy);
-                Args.emplace_back(NewV);
-              }
-              break;
-            } else if (pair.second->GetCompType().IsIntTy()) {
-              for (unsigned i = 4; i < 8; ++i) {
-                Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy);
-                Args.emplace_back(NewV);
-              }
-              break;
-            } else {
-              DXASSERT(false, "Invalid comp type");
+      STy = cast<StructType>(STy->getElementType(0));
+      DxilStructAnnotation *SAnnot = typeSys.GetStructAnnotation(STy);
+      ConstantInt *offsetInt = dyn_cast<ConstantInt>(
+          CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset));
+      unsigned offset = offsetInt->getSExtValue();
+      unsigned currentOffset = 0;
+      for (DxilStructTypeIterator iter = begin(STy, SAnnot),
+                                  ItEnd = end(STy, SAnnot);
+           iter != ItEnd; ++iter) {
+        std::pair<Type *, DxilFieldAnnotation *> pair = *iter;
+        currentOffset += DL.getTypeAllocSize(pair.first);
+        if (currentOffset > offset) {
+          if (pair.second->GetCompType().IsUIntTy()) {
+            for (unsigned i = 4; i < 8; ++i) {
+              Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy);
+              Args.emplace_back(NewV);
             }
+            break;
+          } else if (pair.second->GetCompType().IsIntTy()) {
+            for (unsigned i = 4; i < 8; ++i) {
+              Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy);
+              Args.emplace_back(NewV);
+            }
+            break;
+          } else {
+            DXASSERT(false, "Invalid comp type");
           }
         }
-      } else {
-        // ByteAddressBuffer path: no struct annotation available, so
-        // signedness is unknown. Default to sext.
-        for (unsigned i = 4; i < 8; ++i) {
-          Value *Arg = CI->getArgOperand(i);
-          if (isa<UndefValue>(Arg))
-            Args.emplace_back(UndefValue::get(ToTy));
-          else
-            Args.emplace_back(CIBuilder.CreateSExt(Arg, ToTy));
-        }
       }
     }
 
diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl
index 38485f7de1..1b5ba27baf 100644
--- a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl
+++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl
@@ -30,33 +30,9 @@ void main() {
   // CHECK: call void @dx.op.rawBufferVectorStore.v3f32
   g_buf.Store< min16float3 >(60, vf);
 
-  // === Scalar loads/stores (RawBufferLoad/Store) ===
-
-  // min16int scalar: should use i32 rawBufferStore
-  // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
-  min16int si = g_buf.Load< min16int >(72);
-  // CHECK: call void @dx.op.rawBufferStore.i32
-  g_buf.Store< min16int >(76, si);
-
-  // min16uint scalar: should use i32 rawBufferStore
-  // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32
-  min16uint su = g_buf.Load< min16uint >(80);
-  // CHECK: call void @dx.op.rawBufferStore.i32
-  g_buf.Store< min16uint >(84, su);
-
-  // min16float scalar: should use f32 rawBufferStore
-  // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32
-  min16float sf = g_buf.Load< min16float >(88);
-  // CHECK: call void @dx.op.rawBufferStore.f32
-  g_buf.Store< min16float >(92, sf);
-
-  // Verify i16/f16 ops are NOT used.
+  // Verify i16/f16 ops are NOT used for vector loads/stores.
   // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16
   // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16
   // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16
   // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16
-  // CHECK-NOT: rawBufferLoad.i16
-  // CHECK-NOT: rawBufferStore.i16
-  // CHECK-NOT: rawBufferLoad.f16
-  // CHECK-NOT: rawBufferStore.f16
 }