From ceb4c01cbfa3076cdd2d09732bf295b4ff2a7886 Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Tue, 31 Mar 2026 13:19:52 -0700 Subject: [PATCH 1/2] Fix rawBufferVectorLoad/Store to widen min precision types to 32-bit (#8274) ## Summary Fixes `RawBufferVectorLoad`/`Store` to use 32-bit element types (`i32`/`f32`) for min precision types (`min16int`, `min16uint`, `min16float`) instead of 16-bit (`i16`/`f16`). This matches how pre-SM6.9 `RawBufferLoad` handles min precision. Resolves #8273 ## Root Cause `TranslateBufLoad` in `HLOperationLower.cpp` creates the vector type directly from the min precision element type (`i16`/`f16`) without widening to `i32`/`f32`. This causes WARP (and potentially other drivers) to load/store 2 bytes per element instead of 4, mismatching the buffer layout. ## Fix Apply the same widening pattern used for bool types: - **Load**: Load as `v_i32`/`v_f32`, then trunc/fptrunc back to `i16`/`half` - **Store**: `sext`/`fpext` to `i32`/`f32`, then store as `v_i32`/`v_f32` ## Testing Added FileCheck test verifying all 3 min precision types produce `i32`/`f32` vector load/store ops. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Tex Riddell --- lib/HLSL/DxilGenerationPass.cpp | 75 +++++++++++-------- lib/HLSL/HLOperationLower.cpp | 51 ++++++++++++- .../dxil/debug/min16/min16float_vec.hlsl | 16 ++-- .../dxil/debug/min16/min16int_vec.hlsl | 16 ++-- .../dxil/debug/min16/min16uint_vec.hlsl | 16 ++-- .../min_precision_raw_load_store.hlsl | 62 +++++++++++++++ 6 files changed, 180 insertions(+), 56 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp index c3a6ad7dfc..3930cc3f2f 100644 --- a/lib/HLSL/DxilGenerationPass.cpp +++ b/lib/HLSL/DxilGenerationPass.cpp @@ -993,11 +993,10 @@ void ReplaceMinPrecisionRawBufferStoreByType( Args.emplace_back(NewV); } } else if (FromTy->isIntegerTy()) { - // This case only applies to typed buffer since Store operation of byte - // address buffer for min precision is handled by implicit conversion on - // intrinsic call. Since we are extending integer, we have to know if we - // should sign ext or zero ext. We can do this by iterating checking the - // size of the element at struct type and comp type at type annotation + // Since we are extending integer, we have to know if we should sign ext + // or zero ext. For StructuredBuffers we get signedness from the struct + // type annotation. For ByteAddressBuffer (raw buffers) there is no struct + // annotation, so we fall back to sext as a conservative default. CallInst *handleCI = dyn_cast( CI->getArgOperand(DxilInst_RawBufferStore::arg_uav)); DXASSERT(handleCI, @@ -1007,34 +1006,50 @@ void ReplaceMinPrecisionRawBufferStoreByType( "otherwise fail to handle for buffer store lost its retTy"); StructType *STy = dyn_cast(resTyIt->second); - STy = cast(STy->getElementType(0)); - DxilStructAnnotation *SAnnot = typeSys.GetStructAnnotation(STy); - ConstantInt *offsetInt = dyn_cast( - CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset)); - unsigned offset = offsetInt->getSExtValue(); - unsigned currentOffset = 0; - for (DxilStructTypeIterator iter = begin(STy, SAnnot), - ItEnd = end(STy, SAnnot); - iter != ItEnd; ++iter) { - std::pair pair = *iter; - currentOffset += DL.getTypeAllocSize(pair.first); - if (currentOffset > offset) { - if (pair.second->GetCompType().IsUIntTy()) { - for (unsigned i = 4; i < 8; ++i) { - Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy); - Args.emplace_back(NewV); + StructType *InnerSTy = + STy ? dyn_cast(STy->getElementType(0)) : nullptr; + DxilStructAnnotation *SAnnot = + InnerSTy ? typeSys.GetStructAnnotation(InnerSTy) : nullptr; + + if (SAnnot) { + // StructuredBuffer path: use struct annotation to determine signedness. + ConstantInt *offsetInt = dyn_cast( + CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset)); + unsigned offset = offsetInt->getSExtValue(); + unsigned currentOffset = 0; + for (DxilStructTypeIterator iter = begin(InnerSTy, SAnnot), + ItEnd = end(InnerSTy, SAnnot); + iter != ItEnd; ++iter) { + std::pair pair = *iter; + currentOffset += DL.getTypeAllocSize(pair.first); + if (currentOffset > offset) { + if (pair.second->GetCompType().IsUIntTy()) { + for (unsigned i = 4; i < 8; ++i) { + Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy); + Args.emplace_back(NewV); + } + break; + } else if (pair.second->GetCompType().IsIntTy()) { + for (unsigned i = 4; i < 8; ++i) { + Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy); + Args.emplace_back(NewV); + } + break; + } else { + DXASSERT(false, "Invalid comp type"); } - break; - } else if (pair.second->GetCompType().IsIntTy()) { - for (unsigned i = 4; i < 8; ++i) { - Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy); - Args.emplace_back(NewV); - } - break; - } else { - DXASSERT(false, "Invalid comp type"); } } + } else { + // ByteAddressBuffer path: no struct annotation available, so + // signedness is unknown. Default to sext. + for (unsigned i = 4; i < 8; ++i) { + Value *Arg = CI->getArgOperand(i); + if (isa(Arg)) + Args.emplace_back(UndefValue::get(ToTy)); + else + Args.emplace_back(CIBuilder.CreateSExt(Arg, ToTy)); + } } } diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 69ce941178..ac3994c2ac 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -4308,6 +4308,23 @@ static SmallVector GetBufLoadArgs(ResLoadHelper helper, return Args; } +static bool isMinPrecisionType(Type *EltTy, const DataLayout &DL) { + return !EltTy->isIntegerTy(1) && + DL.getTypeAllocSizeInBits(EltTy) > EltTy->getPrimitiveSizeInBits(); +} + +static Type *widenMinPrecisionType(Type *Ty, LLVMContext &Ctx, + const DataLayout &DL) { + Type *EltTy = Ty->getScalarType(); + if (!isMinPrecisionType(EltTy, DL)) + return Ty; + Type *WideTy = EltTy->isFloatingPointTy() ? Type::getFloatTy(Ctx) + : Type::getInt32Ty(Ctx); + if (Ty->isVectorTy()) + return VectorType::get(WideTy, Ty->getVectorNumElements()); + return WideTy; +} + // Emits as many calls as needed to load the full vector // Performs any needed extractions and conversions of the results. Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, @@ -4321,10 +4338,13 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, NumComponents = Ty->getVectorNumElements(); const bool isTyped = DXIL::IsTyped(RK); - Type *EltTy = Ty->getScalarType(); + Type *OrigEltTy = Ty->getScalarType(); + Type *WidenedTy = widenMinPrecisionType(Ty, Builder.getContext(), DL); + Type *EltTy = WidenedTy->getScalarType(); + const bool isMinPrec = (WidenedTy != Ty); const bool is64 = (EltTy->isIntegerTy(64) || EltTy->isDoubleTy()); const bool isBool = EltTy->isIntegerTy(1); - // Values will be loaded in memory representations. + // DXIL buffer loads require i32; narrow types are reconverted after load. if (isBool || (is64 && isTyped)) EltTy = Builder.getInt32Ty(); @@ -4440,6 +4460,14 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, retValNew = Builder.CreateICmpNE( retValNew, Constant::getNullValue(retValNew->getType())); + // DXIL loads min precision as 32-bit; narrow back to original IR type. + if (isMinPrec) { + if (OrigEltTy->isIntegerTy()) + retValNew = Builder.CreateTrunc(retValNew, Ty); + else + retValNew = Builder.CreateFPTrunc(retValNew, Ty); + } + helper.retVal->replaceAllUsesWith(retValNew); helper.retVal = retValNew; @@ -4560,6 +4588,25 @@ void TranslateStore(DxilResource::Kind RK, Value *handle, Value *val, val = Builder.CreateZExt(val, Ty); } + // Min precision alloc size is 32-bit; widen to match store intrinsic. + // Scalar RawBufferStore widening is handled by TranslateMinPrecisionRawBuffer + // in DxilGenerationPass, which has signedness info from struct annotations. + if (opcode == OP::OpCode::RawBufferVectorStore) { + const DataLayout &DL = + OP->GetModule()->GetHLModule().GetModule()->getDataLayout(); + Type *WideTy = widenMinPrecisionType(Ty, Builder.getContext(), DL); + if (WideTy != Ty) { + if (EltTy->isFloatingPointTy()) + val = Builder.CreateFPExt(val, WideTy); + else + // TODO(#8314): Signedness info is lost by this point; SExt is wrong + // for min16uint. Front-end should widen during Clang CodeGen instead. + val = Builder.CreateSExt(val, WideTy); + EltTy = WideTy->getScalarType(); + Ty = WideTy; + } + } + // If RawBuffer store of 64-bit value, don't set alignment to 8, // since buffer alignment isn't known to be anything over 4. unsigned alignValue = OP->GetAllocSizeForType(EltTy); diff --git a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl index 60fff4a6df..5dae57d261 100644 --- a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl +++ b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16float_vec.hlsl @@ -16,20 +16,20 @@ void main() { Foo foo = buf[0]; // foo.m_B.x - // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16) // foo.m_B.y - // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16) // foo.m_B.z - // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16) // foo.m_A.x - // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata half %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) min16float value1 = foo.m_B.x; min16float value2 = foo.m_B.y; diff --git a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl index a16a006b76..b7af2cf87d 100644 --- a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl +++ b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16int_vec.hlsl @@ -16,20 +16,20 @@ void main() { Foo foo = buf[0]; // foo.m_B.x - // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16) // foo.m_B.y - // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16) // foo.m_B.z - // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16) // foo.m_A.x - // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) min16int value1 = foo.m_B.x; min16int value2 = foo.m_B.y; diff --git a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl index e09a944a44..488c0385f9 100644 --- a/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl +++ b/tools/clang/test/HLSLFileCheck/dxil/debug/min16/min16uint_vec.hlsl @@ -16,20 +16,20 @@ void main() { Foo foo = buf[0]; // foo.m_B.x - // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 96, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 48, 16) // foo.m_B.y - // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 128, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 64, 16) // foo.m_B.z - // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 160, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 80, 16) // foo.m_A.x - // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) - // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[0-9]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) + // CHECK-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) + // CHECK16-DAG: call void @llvm.dbg.value(metadata i16 %{{[^ ,]+}}, i64 0, metadata !{{[0-9]+}}, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}} ; var:"foo" !DIExpression(DW_OP_bit_piece, 0, 16) min16int value1 = foo.m_B.x; min16int value2 = foo.m_B.y; diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl new file mode 100644 index 0000000000..38485f7de1 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl @@ -0,0 +1,62 @@ +// RUN: %dxc -E main -T cs_6_9 %s | FileCheck %s + +// Regression test for min precision rawBufferLoad/Store. +// Min precision types should use i32/f32 operations (not i16/f16) +// to match how pre-SM6.9 RawBufferLoad handles min precision. + +RWByteAddressBuffer g_buf : register(u0); + +[numthreads(1,1,1)] +void main() { + // === Vector loads/stores (RawBufferVectorLoad/Store) === + + // min16int: should load as v3i32, not v3i16 + // CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32 + min16int3 vi = g_buf.Load< min16int3 >(0); + // CHECK: call void @dx.op.rawBufferVectorStore.v3i32 + g_buf.Store< min16int3 >(12, vi); + + // min16uint: should load as v3i32, not v3i16 + // CHECK: call %dx.types.ResRet.v3i32 @dx.op.rawBufferVectorLoad.v3i32 + min16uint3 vu = g_buf.Load< min16uint3 >(24); + // CHECK: call void @dx.op.rawBufferVectorStore.v3i32 + g_buf.Store< min16uint3 >(36, vu); + + // min16float: should load as v3f32, not v3f16 + // CHECK: call %dx.types.ResRet.v3f32 @dx.op.rawBufferVectorLoad.v3f32 + // CHECK: fptrunc <3 x float> {{.*}} to <3 x half> + min16float3 vf = g_buf.Load< min16float3 >(48); + // CHECK: fpext <3 x half> {{.*}} to <3 x float> + // CHECK: call void @dx.op.rawBufferVectorStore.v3f32 + g_buf.Store< min16float3 >(60, vf); + + // === Scalar loads/stores (RawBufferLoad/Store) === + + // min16int scalar: should use i32 rawBufferStore + // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32 + min16int si = g_buf.Load< min16int >(72); + // CHECK: call void @dx.op.rawBufferStore.i32 + g_buf.Store< min16int >(76, si); + + // min16uint scalar: should use i32 rawBufferStore + // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32 + min16uint su = g_buf.Load< min16uint >(80); + // CHECK: call void @dx.op.rawBufferStore.i32 + g_buf.Store< min16uint >(84, su); + + // min16float scalar: should use f32 rawBufferStore + // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32 + min16float sf = g_buf.Load< min16float >(88); + // CHECK: call void @dx.op.rawBufferStore.f32 + g_buf.Store< min16float >(92, sf); + + // Verify i16/f16 ops are NOT used. + // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16 + // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16 + // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16 + // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16 + // CHECK-NOT: rawBufferLoad.i16 + // CHECK-NOT: rawBufferStore.i16 + // CHECK-NOT: rawBufferLoad.f16 + // CHECK-NOT: rawBufferStore.f16 +} From 9e8fae916168fcaa65d35aa887ab3ebe7794d9fb Mon Sep 17 00:00:00 2001 From: Alex Sepkowski Date: Wed, 1 Apr 2026 12:09:58 -0700 Subject: [PATCH 2/2] Revert out-of-scope DxilGenerationPass changes from #8274 (#8321) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Reverts the `DxilGenerationPass` ByteAddressBuffer scalar store changes and removes scalar store tests that were included in PR #8274. These changes were out of scope for the vector load/store fix and, on further discussion, were concluded to be incomplete. Follow-up issue for the proper fix: #8322 ## What changed - **DxilGenerationPass.cpp**: Reverted the ByteAddressBuffer fallback path added in `ReplaceMinPrecisionRawBufferStoreByType`. The `sext` fallback was wrong for `min16uint` (loses signedness), and the fix belongs in CodeGen where signedness info is still available. - **min_precision_raw_load_store.hlsl**: Removed scalar load/store tests. Scalar `ByteAddressBuffer::Store()` hits a pre-existing crash in `TranslateMinPrecisionRawBuffer` (`cast` on ByteAddressBuffer's `i32` inner element). Test now covers vector loads/stores only, which is the scope of the original fix. ## Context The original PR #8274 correctly fixed `RawBufferVectorLoad/Store` to widen min precision types to 32-bit. However, it also added a ByteAddressBuffer scalar store fix in `DxilGenerationPass` that: 1. Crept outside the scope of the vector load/store fix 2. Was incomplete — the `sext` fallback is wrong for unsigned types (`min16uint`) 3. Should instead be handled during Clang CodeGen, where signedness information is available Scalar ByteAddressBuffer template store widening for min precision types is a separate pre-existing issue that needs a proper fix in CodeGen. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- lib/HLSL/DxilGenerationPass.cpp | 75 ++++++++----------- .../min_precision_raw_load_store.hlsl | 26 +------ 2 files changed, 31 insertions(+), 70 deletions(-) diff --git a/lib/HLSL/DxilGenerationPass.cpp b/lib/HLSL/DxilGenerationPass.cpp index 3930cc3f2f..c3a6ad7dfc 100644 --- a/lib/HLSL/DxilGenerationPass.cpp +++ b/lib/HLSL/DxilGenerationPass.cpp @@ -993,10 +993,11 @@ void ReplaceMinPrecisionRawBufferStoreByType( Args.emplace_back(NewV); } } else if (FromTy->isIntegerTy()) { - // Since we are extending integer, we have to know if we should sign ext - // or zero ext. For StructuredBuffers we get signedness from the struct - // type annotation. For ByteAddressBuffer (raw buffers) there is no struct - // annotation, so we fall back to sext as a conservative default. + // This case only applies to typed buffer since Store operation of byte + // address buffer for min precision is handled by implicit conversion on + // intrinsic call. Since we are extending integer, we have to know if we + // should sign ext or zero ext. We can do this by iterating checking the + // size of the element at struct type and comp type at type annotation CallInst *handleCI = dyn_cast( CI->getArgOperand(DxilInst_RawBufferStore::arg_uav)); DXASSERT(handleCI, @@ -1006,50 +1007,34 @@ void ReplaceMinPrecisionRawBufferStoreByType( "otherwise fail to handle for buffer store lost its retTy"); StructType *STy = dyn_cast(resTyIt->second); - StructType *InnerSTy = - STy ? dyn_cast(STy->getElementType(0)) : nullptr; - DxilStructAnnotation *SAnnot = - InnerSTy ? typeSys.GetStructAnnotation(InnerSTy) : nullptr; - - if (SAnnot) { - // StructuredBuffer path: use struct annotation to determine signedness. - ConstantInt *offsetInt = dyn_cast( - CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset)); - unsigned offset = offsetInt->getSExtValue(); - unsigned currentOffset = 0; - for (DxilStructTypeIterator iter = begin(InnerSTy, SAnnot), - ItEnd = end(InnerSTy, SAnnot); - iter != ItEnd; ++iter) { - std::pair pair = *iter; - currentOffset += DL.getTypeAllocSize(pair.first); - if (currentOffset > offset) { - if (pair.second->GetCompType().IsUIntTy()) { - for (unsigned i = 4; i < 8; ++i) { - Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy); - Args.emplace_back(NewV); - } - break; - } else if (pair.second->GetCompType().IsIntTy()) { - for (unsigned i = 4; i < 8; ++i) { - Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy); - Args.emplace_back(NewV); - } - break; - } else { - DXASSERT(false, "Invalid comp type"); + STy = cast(STy->getElementType(0)); + DxilStructAnnotation *SAnnot = typeSys.GetStructAnnotation(STy); + ConstantInt *offsetInt = dyn_cast( + CI->getArgOperand(DxilInst_RawBufferStore::arg_elementOffset)); + unsigned offset = offsetInt->getSExtValue(); + unsigned currentOffset = 0; + for (DxilStructTypeIterator iter = begin(STy, SAnnot), + ItEnd = end(STy, SAnnot); + iter != ItEnd; ++iter) { + std::pair pair = *iter; + currentOffset += DL.getTypeAllocSize(pair.first); + if (currentOffset > offset) { + if (pair.second->GetCompType().IsUIntTy()) { + for (unsigned i = 4; i < 8; ++i) { + Value *NewV = CIBuilder.CreateZExt(CI->getArgOperand(i), ToTy); + Args.emplace_back(NewV); } + break; + } else if (pair.second->GetCompType().IsIntTy()) { + for (unsigned i = 4; i < 8; ++i) { + Value *NewV = CIBuilder.CreateSExt(CI->getArgOperand(i), ToTy); + Args.emplace_back(NewV); + } + break; + } else { + DXASSERT(false, "Invalid comp type"); } } - } else { - // ByteAddressBuffer path: no struct annotation available, so - // signedness is unknown. Default to sext. - for (unsigned i = 4; i < 8; ++i) { - Value *Arg = CI->getArgOperand(i); - if (isa(Arg)) - Args.emplace_back(UndefValue::get(ToTy)); - else - Args.emplace_back(CIBuilder.CreateSExt(Arg, ToTy)); - } } } diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl index 38485f7de1..1b5ba27baf 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/min_precision_raw_load_store.hlsl @@ -30,33 +30,9 @@ void main() { // CHECK: call void @dx.op.rawBufferVectorStore.v3f32 g_buf.Store< min16float3 >(60, vf); - // === Scalar loads/stores (RawBufferLoad/Store) === - - // min16int scalar: should use i32 rawBufferStore - // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32 - min16int si = g_buf.Load< min16int >(72); - // CHECK: call void @dx.op.rawBufferStore.i32 - g_buf.Store< min16int >(76, si); - - // min16uint scalar: should use i32 rawBufferStore - // CHECK: call %dx.types.ResRet.i32 @dx.op.rawBufferLoad.i32 - min16uint su = g_buf.Load< min16uint >(80); - // CHECK: call void @dx.op.rawBufferStore.i32 - g_buf.Store< min16uint >(84, su); - - // min16float scalar: should use f32 rawBufferStore - // CHECK: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32 - min16float sf = g_buf.Load< min16float >(88); - // CHECK: call void @dx.op.rawBufferStore.f32 - g_buf.Store< min16float >(92, sf); - - // Verify i16/f16 ops are NOT used. + // Verify i16/f16 ops are NOT used for vector loads/stores. // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}i16 // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}i16 // CHECK-NOT: rawBufferVectorLoad.v{{[0-9]+}}f16 // CHECK-NOT: rawBufferVectorStore.v{{[0-9]+}}f16 - // CHECK-NOT: rawBufferLoad.i16 - // CHECK-NOT: rawBufferStore.i16 - // CHECK-NOT: rawBufferLoad.f16 - // CHECK-NOT: rawBufferStore.f16 }