diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h index 4126d861ac..f77bab929f 100644 --- a/tools/clang/unittests/HLSLExec/LongVectorTestData.h +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -237,6 +237,227 @@ struct HLSLHalf_t { DirectX::PackedVector::HALF Val = 0; }; +// Min precision wrapper types. Without -enable-16bit-types, min precision types +// are 32-bit in DXIL storage. These thin wrappers provide distinct C++ types +// that map to different HLSL type strings via DATA_TYPE. +struct HLSLMin16Float_t { + constexpr HLSLMin16Float_t() : Val(0.0f) {} + constexpr HLSLMin16Float_t(float F) : Val(F) {} + constexpr HLSLMin16Float_t(double D) : Val(static_cast(D)) {} + explicit constexpr HLSLMin16Float_t(int I) : Val(static_cast(I)) {} + explicit constexpr HLSLMin16Float_t(uint32_t U) + : Val(static_cast(U)) {} + + constexpr operator float() const { return Val; } + + bool operator==(const HLSLMin16Float_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Float_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Float_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Float_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Float_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Float_t &O) const { return Val >= O.Val; } + + HLSLMin16Float_t operator+(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val + O.Val); + } + HLSLMin16Float_t operator-(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val - O.Val); + } + HLSLMin16Float_t operator*(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val * O.Val); + } + HLSLMin16Float_t operator/(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(Val / O.Val); + } + HLSLMin16Float_t operator%(const HLSLMin16Float_t &O) const { + return HLSLMin16Float_t(std::fmod(Val, O.Val)); + } + + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Float_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, + const HLSLMin16Float_t &Obj) { + Os << Obj.Val; + return Os; + } + + float Val; +}; +struct HLSLMin16Int_t { + constexpr HLSLMin16Int_t() : Val(0) {} + constexpr HLSLMin16Int_t(int32_t I) : Val(I) {} + constexpr HLSLMin16Int_t(int64_t I) : Val(static_cast(I)) {} + constexpr HLSLMin16Int_t(uint32_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Int_t(uint64_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Int_t(float F) : Val(static_cast(F)) {} + constexpr HLSLMin16Int_t(double D) : Val(static_cast(D)) {} + + constexpr operator int32_t() const { return Val; } + + bool operator==(const HLSLMin16Int_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Int_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Int_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Int_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Int_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Int_t &O) const { return Val >= O.Val; } + + HLSLMin16Int_t operator+(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val + O.Val); + } + HLSLMin16Int_t operator-(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val - O.Val); + } + HLSLMin16Int_t operator*(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val * O.Val); + } + HLSLMin16Int_t operator/(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val / O.Val); + } + HLSLMin16Int_t operator%(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val % O.Val); + } + HLSLMin16Int_t operator&(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val & O.Val); + } + HLSLMin16Int_t operator|(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val | O.Val); + } + HLSLMin16Int_t operator^(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val ^ O.Val); + } + HLSLMin16Int_t operator<<(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val << O.Val); + } + HLSLMin16Int_t operator>>(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val >> O.Val); + } + HLSLMin16Int_t operator~() const { return HLSLMin16Int_t(~Val); } + HLSLMin16Int_t &operator<<=(const HLSLMin16Int_t &O) { + Val <<= O.Val; + return *this; + } + HLSLMin16Int_t &operator>>=(const HLSLMin16Int_t &O) { + Val >>= O.Val; + return *this; + } + HLSLMin16Int_t &operator|=(const HLSLMin16Int_t &O) { + Val |= O.Val; + return *this; + } + HLSLMin16Int_t &operator&=(const HLSLMin16Int_t &O) { + Val &= O.Val; + return *this; + } + HLSLMin16Int_t &operator^=(const HLSLMin16Int_t &O) { + Val ^= O.Val; + return *this; + } + HLSLMin16Int_t operator&&(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val && O.Val); + } + HLSLMin16Int_t operator||(const HLSLMin16Int_t &O) const { + return HLSLMin16Int_t(Val || O.Val); + } + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Int_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, const HLSLMin16Int_t &Obj) { + Os << Obj.Val; + return Os; + } + + int32_t Val; +}; +struct HLSLMin16Uint_t { + constexpr HLSLMin16Uint_t() : Val(0) {} + constexpr HLSLMin16Uint_t(uint32_t U) : Val(U) {} + constexpr HLSLMin16Uint_t(uint64_t U) : Val(static_cast(U)) {} + constexpr HLSLMin16Uint_t(int32_t I) : Val(static_cast(I)) {} + constexpr HLSLMin16Uint_t(float F) : Val(static_cast(F)) {} + constexpr HLSLMin16Uint_t(double D) : Val(static_cast(D)) {} + + constexpr operator uint32_t() const { return Val; } + + bool operator==(const HLSLMin16Uint_t &O) const { return Val == O.Val; } + bool operator!=(const HLSLMin16Uint_t &O) const { return Val != O.Val; } + bool operator<(const HLSLMin16Uint_t &O) const { return Val < O.Val; } + bool operator>(const HLSLMin16Uint_t &O) const { return Val > O.Val; } + bool operator<=(const HLSLMin16Uint_t &O) const { return Val <= O.Val; } + bool operator>=(const HLSLMin16Uint_t &O) const { return Val >= O.Val; } + + HLSLMin16Uint_t operator+(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val + O.Val); + } + HLSLMin16Uint_t operator-(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val - O.Val); + } + HLSLMin16Uint_t operator*(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val * O.Val); + } + HLSLMin16Uint_t operator/(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val / O.Val); + } + HLSLMin16Uint_t operator%(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val % O.Val); + } + HLSLMin16Uint_t operator&(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val & O.Val); + } + HLSLMin16Uint_t operator|(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val | O.Val); + } + HLSLMin16Uint_t operator^(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val ^ O.Val); + } + HLSLMin16Uint_t operator<<(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val << O.Val); + } + HLSLMin16Uint_t operator>>(const HLSLMin16Uint_t &O) const { + return HLSLMin16Uint_t(Val >> O.Val); + } + HLSLMin16Uint_t operator~() const { return HLSLMin16Uint_t(~Val); } + HLSLMin16Uint_t &operator<<=(const HLSLMin16Uint_t &O) { + Val <<= O.Val; + return *this; + } + HLSLMin16Uint_t &operator>>=(const HLSLMin16Uint_t &O) { + Val >>= O.Val; + return *this; + } + HLSLMin16Uint_t &operator|=(const HLSLMin16Uint_t &O) { + Val |= O.Val; + return *this; + } + HLSLMin16Uint_t &operator&=(const HLSLMin16Uint_t &O) { + Val &= O.Val; + return *this; + } + HLSLMin16Uint_t &operator^=(const HLSLMin16Uint_t &O) { + Val ^= O.Val; + return *this; + } + + bool operator&&(const HLSLMin16Uint_t &O) const { return Val && O.Val; } + bool operator||(const HLSLMin16Uint_t &O) const { return Val || O.Val; } + friend std::wostream &operator<<(std::wostream &Os, + const HLSLMin16Uint_t &Obj) { + Os << Obj.Val; + return Os; + } + friend std::ostream &operator<<(std::ostream &Os, + const HLSLMin16Uint_t &Obj) { + Os << Obj.Val; + return Os; + } + + uint32_t Val; +}; + enum class InputSet { #define INPUT_SET(SYMBOL) SYMBOL, #include "LongVectorOps.def" @@ -450,6 +671,57 @@ INPUT_SET(InputSet::SelectCond, 0.0, 1.0); INPUT_SET(InputSet::AllOnes, 1.0); END_INPUT_SETS() +// Min precision input sets. All values are exactly representable in float16 +// to avoid precision mismatch between CPU-side expected values and GPU-side +// min precision computation. No FP specials (INF/NaN/denorm) as min precision +// types do not support them. +BEGIN_INPUT_SETS(HLSLMin16Float_t) +INPUT_SET(InputSet::Default1, -1.0f, -1.0f, 1.0f, -0.03125f, 1.0f, -0.03125f, + 1.0f, -0.03125f, 1.0f, -0.03125f); +INPUT_SET(InputSet::Default2, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, + -1.0f, 1.0f, -1.0f); +INPUT_SET(InputSet::Default3, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, + 1.0f, -1.0f, 1.0f); +INPUT_SET(InputSet::Zero, 0.0f); +INPUT_SET(InputSet::RangeHalfPi, -1.0625f, 0.046875f, -1.046875f, 0.3125f, + 1.4375f, -0.875f, 1.375f, -0.71875f, -0.8125f, 0.5625f); +INPUT_SET(InputSet::RangeOne, 0.328125f, 0.71875f, -0.953125f, 0.671875f, + -0.03125f, 0.5f, 0.84375f, -0.671875f, -0.6875f, -0.90625f); +INPUT_SET(InputSet::Positive, 1.0f, 1.0f, 342.0f, 0.03125f, 5504.0f, 0.03125f, + 1.0f, 0.03125f, 331.25f, 3250.0f); +INPUT_SET(InputSet::SelectCond, 0.0f, 1.0f); +INPUT_SET(InputSet::AllOnes, 1.0f); +END_INPUT_SETS() + +// Values constrained to int16 range. Kept small to avoid overflow ambiguity. +// Shift amounts limited so results fit in int16 (-32768..32767). +BEGIN_INPUT_SETS(HLSLMin16Int_t) +INPUT_SET(InputSet::Default1, -6, 1, 7, 3, 8, 4, -3, 8, 8, -2); +INPUT_SET(InputSet::Default2, 5, -6, -3, -2, 9, 3, 1, -3, -7, 2); +INPUT_SET(InputSet::Default3, -5, 6, 3, 2, -9, -3, -1, 3, 7, -2); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 12, 11, 11, 14); +INPUT_SET(InputSet::Zero, 0); +INPUT_SET(InputSet::NoZero, 1); +INPUT_SET(InputSet::SelectCond, 0, 1); +INPUT_SET(InputSet::AllOnes, 1); +INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, + -1); +END_INPUT_SETS() + +// Values constrained so results stay below 0x8000 (bit 15 clear). WARP may +// compute min precision at 16-bit and sign-extend bit 15 on 32-bit store. +BEGIN_INPUT_SETS(HLSLMin16Uint_t) +INPUT_SET(InputSet::Default1, 3, 7, 3, 5, 5, 10, 4, 8, 9, 10); +INPUT_SET(InputSet::Default2, 2, 6, 3, 4, 5, 9, 3, 8, 9, 10); +INPUT_SET(InputSet::Default3, 4, 5, 4, 5, 3, 7, 3, 1, 11, 9); +INPUT_SET(InputSet::Zero, 0); +INPUT_SET(InputSet::BitShiftRhs, 1, 6, 3, 0, 9, 3, 8, 8, 8, 8); +INPUT_SET(InputSet::SelectCond, 0, 1); +INPUT_SET(InputSet::AllOnes, 1); +INPUT_SET(InputSet::WaveMultiPrefixBitwise, 0x0, 0x1, 0x3, 0x4, 0x10, 0x12, 0xF, + 0x7FFF); +END_INPUT_SETS() + #undef BEGIN_INPUT_SETS #undef INPUT_SET #undef END_INPUT_SETS diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp index 927efd0d49..4a9a4ae8a7 100644 --- a/tools/clang/unittests/HLSLExec/LongVectors.cpp +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -59,6 +59,9 @@ DATA_TYPE(uint16_t, "uint16_t", 2) DATA_TYPE(uint32_t, "uint32_t", 4) DATA_TYPE(uint64_t, "uint64_t", 8) DATA_TYPE(HLSLHalf_t, "half", 2) +DATA_TYPE(HLSLMin16Float_t, "min16float", 4) +DATA_TYPE(HLSLMin16Int_t, "min16int", 4) +DATA_TYPE(HLSLMin16Uint_t, "min16uint", 4) DATA_TYPE(float, "float", 4) DATA_TYPE(double, "double", 8) @@ -66,7 +69,13 @@ DATA_TYPE(double, "double", 8) template constexpr bool isFloatingPointType() { return std::is_same_v || std::is_same_v || - std::is_same_v; + std::is_same_v || std::is_same_v; +} + +template constexpr bool isMinPrecisionType() { + return std::is_same_v || + std::is_same_v || + std::is_same_v; } // @@ -218,6 +227,34 @@ bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, double Tolerance, } } +// Min precision float comparison: convert to half and compare in fp16 space. +// This reuses the same tolerance values as HLSLHalf_t. Min precision is at +// least 16-bit, so fp16 tolerances are an upper bound for all cases. +bool doValuesMatch(HLSLMin16Float_t A, HLSLMin16Float_t B, double Tolerance, + ValidationType ValidationType) { + auto HalfA = DirectX::PackedVector::XMConvertFloatToHalf(A.Val); + auto HalfB = DirectX::PackedVector::XMConvertFloatToHalf(B.Val); + switch (ValidationType) { + case ValidationType::Epsilon: + return CompareHalfEpsilon(HalfA, HalfB, static_cast(Tolerance)); + case ValidationType::Ulp: + return CompareHalfULP(HalfA, HalfB, static_cast(Tolerance)); + default: + hlsl_test::LogErrorFmt( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +bool doValuesMatch(HLSLMin16Int_t A, HLSLMin16Int_t B, double, ValidationType) { + return A == B; +} + +bool doValuesMatch(HLSLMin16Uint_t A, HLSLMin16Uint_t B, double, + ValidationType) { + return A == B; +} + bool doValuesMatch(float A, float B, double Tolerance, ValidationType ValidationType) { switch (ValidationType) { @@ -917,6 +954,24 @@ template <> struct TrigonometricValidation { ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); }; +// Min precision trig tolerances: same as half precision since min precision +// is at least 16-bit and our doValuesMatch compares in half-precision space. +template struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Epsilon(0.003f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + +template <> struct TrigonometricValidation { + ValidationConfig ValidationConfig = ValidationConfig::Ulp(2.0f); +}; + #define TRIG_OP(OP, IMPL) \ template struct Op : TrigonometricValidation { \ T operator()(T A) { return IMPL; } \ @@ -1073,7 +1128,7 @@ template <> struct ExpectedBuilder { // template T UnaryMathAbs(T A) { - if constexpr (std::is_unsigned_v) + if constexpr (std::is_unsigned_v || std::is_same_v) return A; else return static_cast(std::abs(A)); @@ -1285,7 +1340,12 @@ static double computeAbsoluteEpsilon(double A, double ULPTolerance) { if constexpr (std::is_same_v) ULP = HLSLHalf_t::GetULP(A); - else + else if constexpr (std::is_same_v) { + // Min precision floats may be computed at float16 on the GPU, so use + // half-precision ULP for tolerance. Reuse HLSLHalf_t::GetULP which + // computes ULP by incrementing the float16 bit representation. + ULP = HLSLHalf_t::GetULP(HLSLHalf_t(static_cast(A))); + } else ULP = std::nextafter(static_cast(A), std::numeric_limits::infinity()) - static_cast(A); @@ -2724,6 +2784,314 @@ class DxilConf_SM69_Vectorized_Core : public TestClassCommon { HLK_WAVEOP_TEST(WaveMultiPrefixSum, float); HLK_WAVEOP_TEST(WaveMultiPrefixProduct, float); HLK_WAVEOP_TEST(WaveMatch, float); + + // ---- HLSLMin16Float_t (mirrors applicable HLSLHalf_t ops) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Float_t); + + // BinaryMath + HLK_TEST(Add, HLSLMin16Float_t); + HLK_TEST(Subtract, HLSLMin16Float_t); + HLK_TEST(Multiply, HLSLMin16Float_t); + HLK_TEST(Divide, HLSLMin16Float_t); + HLK_TEST(Modulus, HLSLMin16Float_t); + HLK_TEST(Min, HLSLMin16Float_t); + HLK_TEST(Max, HLSLMin16Float_t); + HLK_TEST(Ldexp, HLSLMin16Float_t); + + // Saturate + HLK_TEST(Saturate, HLSLMin16Float_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Float_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Float_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Float_t); + HLK_TEST(ShuffleVector, HLSLMin16Float_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Float_t); + HLK_TEST(CastToInt32, HLSLMin16Float_t); + HLK_TEST(CastToInt64, HLSLMin16Float_t); + HLK_TEST(CastToUint32_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToUint64_FromFP, HLSLMin16Float_t); + HLK_TEST(CastToFloat32, HLSLMin16Float_t); + // Note: CastToInt16, CastToUint16_FromFP, CastToFloat16 excluded — + // 16-bit output types require -enable-16bit-types which changes min + // precision semantics (min16float becomes half), breaking the test + // infrastructure's 32-bit buffer I/O assumptions. + + // Trigonometric + HLK_TEST(Acos, HLSLMin16Float_t); + HLK_TEST(Asin, HLSLMin16Float_t); + HLK_TEST(Atan, HLSLMin16Float_t); + HLK_TEST(Cos, HLSLMin16Float_t); + HLK_TEST(Cosh, HLSLMin16Float_t); + HLK_TEST(Sin, HLSLMin16Float_t); + HLK_TEST(Sinh, HLSLMin16Float_t); + HLK_TEST(Tan, HLSLMin16Float_t); + HLK_TEST(Tanh, HLSLMin16Float_t); + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Float_t); + HLK_TEST(Ceil, HLSLMin16Float_t); + HLK_TEST(Exp, HLSLMin16Float_t); + HLK_TEST(Floor, HLSLMin16Float_t); + HLK_TEST(Frac, HLSLMin16Float_t); + HLK_TEST(Log, HLSLMin16Float_t); + HLK_TEST(Rcp, HLSLMin16Float_t); + HLK_TEST(Round, HLSLMin16Float_t); + HLK_TEST(Rsqrt, HLSLMin16Float_t); + HLK_TEST(Sign, HLSLMin16Float_t); + HLK_TEST(Sqrt, HLSLMin16Float_t); + HLK_TEST(Trunc, HLSLMin16Float_t); + HLK_TEST(Exp2, HLSLMin16Float_t); + HLK_TEST(Log10, HLSLMin16Float_t); + HLK_TEST(Log2, HLSLMin16Float_t); + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Float_t); + HLK_TEST(LessEqual, HLSLMin16Float_t); + HLK_TEST(GreaterThan, HLSLMin16Float_t); + HLK_TEST(GreaterEqual, HLSLMin16Float_t); + HLK_TEST(Equal, HLSLMin16Float_t); + HLK_TEST(NotEqual, HLSLMin16Float_t); + + // Select + HLK_TEST(Select, HLSLMin16Float_t); + + // Dot + HLK_TEST(Dot, HLSLMin16Float_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Float_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Float_t); + + // Derivative + HLK_TEST(DerivativeDdx, HLSLMin16Float_t); + HLK_TEST(DerivativeDdy, HLSLMin16Float_t); + HLK_TEST(DerivativeDdxFine, HLSLMin16Float_t); + HLK_TEST(DerivativeDdyFine, HLSLMin16Float_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Float_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Float_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Float_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Float_t); + + // ---- HLSLMin16Int_t (mirrors applicable int16_t ops) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Int_t); + + // BinaryMath + // Note: Divide and Modulus excluded — HLSL does not support signed integer + // division on minimum-precision types. + HLK_TEST(Add, HLSLMin16Int_t); + HLK_TEST(Subtract, HLSLMin16Int_t); + HLK_TEST(Multiply, HLSLMin16Int_t); + HLK_TEST(Min, HLSLMin16Int_t); + HLK_TEST(Max, HLSLMin16Int_t); + + // Bitwise + HLK_TEST(And, HLSLMin16Int_t); + HLK_TEST(Or, HLSLMin16Int_t); + HLK_TEST(Xor, HLSLMin16Int_t); + HLK_TEST(LeftShift, HLSLMin16Int_t); + HLK_TEST(RightShift, HLSLMin16Int_t); + // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded - + // DXC promotes min precision to i32 before these intrinsics, so they + // don't operate at min precision. + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Int_t); + HLK_TEST(Sign, HLSLMin16Int_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Int_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Int_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Int_t); + HLK_TEST(ShuffleVector, HLSLMin16Int_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Int_t); + HLK_TEST(CastToInt32, HLSLMin16Int_t); + HLK_TEST(CastToInt64, HLSLMin16Int_t); + HLK_TEST(CastToUint32, HLSLMin16Int_t); + HLK_TEST(CastToUint64, HLSLMin16Int_t); + HLK_TEST(CastToFloat32, HLSLMin16Int_t); + // Note: CastToUint16, CastToFloat16 excluded — see min16float note. + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Int_t); + HLK_TEST(LessEqual, HLSLMin16Int_t); + HLK_TEST(GreaterThan, HLSLMin16Int_t); + HLK_TEST(GreaterEqual, HLSLMin16Int_t); + HLK_TEST(Equal, HLSLMin16Int_t); + HLK_TEST(NotEqual, HLSLMin16Int_t); + + // Select + HLK_TEST(Select, HLSLMin16Int_t); + + // Reduction + HLK_TEST(Any_Mixed, HLSLMin16Int_t); + HLK_TEST(Any_Zero, HLSLMin16Int_t); + HLK_TEST(Any_NoZero, HLSLMin16Int_t); + HLK_TEST(All_Mixed, HLSLMin16Int_t); + HLK_TEST(All_Zero, HLSLMin16Int_t); + HLK_TEST(All_NoZero, HLSLMin16Int_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Int_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Int_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Int_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Int_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Int_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Int_t); + + // ---- HLSLMin16Uint_t (mirrors applicable uint16_t ops) ---- + + // TernaryMath + HLK_TEST(Mad, HLSLMin16Uint_t); + + // BinaryMath + HLK_TEST(Add, HLSLMin16Uint_t); + HLK_TEST(Subtract, HLSLMin16Uint_t); + HLK_TEST(Multiply, HLSLMin16Uint_t); + HLK_TEST(Divide, HLSLMin16Uint_t); + HLK_TEST(Modulus, HLSLMin16Uint_t); + HLK_TEST(Min, HLSLMin16Uint_t); + HLK_TEST(Max, HLSLMin16Uint_t); + + // Bitwise + HLK_TEST(And, HLSLMin16Uint_t); + HLK_TEST(Or, HLSLMin16Uint_t); + HLK_TEST(Xor, HLSLMin16Uint_t); + HLK_TEST(LeftShift, HLSLMin16Uint_t); + HLK_TEST(RightShift, HLSLMin16Uint_t); + // Note: ReverseBits, CountBits, FirstBitHigh, FirstBitLow excluded - + // DXC promotes min precision to i32 before these intrinsics, so they + // don't operate at min precision. + + // UnaryMath + HLK_TEST(Abs, HLSLMin16Uint_t); + HLK_TEST(Sign, HLSLMin16Uint_t); + + // Unary + HLK_TEST(Initialize, HLSLMin16Uint_t); + HLK_TEST(ArrayOperator_StaticAccess, HLSLMin16Uint_t); + HLK_TEST(ArrayOperator_DynamicAccess, HLSLMin16Uint_t); + HLK_TEST(ShuffleVector, HLSLMin16Uint_t); + + // Cast + HLK_TEST(CastToBool, HLSLMin16Uint_t); + HLK_TEST(CastToInt32, HLSLMin16Uint_t); + HLK_TEST(CastToInt64, HLSLMin16Uint_t); + HLK_TEST(CastToUint32, HLSLMin16Uint_t); + HLK_TEST(CastToUint64, HLSLMin16Uint_t); + HLK_TEST(CastToFloat32, HLSLMin16Uint_t); + // Note: CastToInt16, CastToFloat16 excluded — see min16float note. + + // BinaryComparison + HLK_TEST(LessThan, HLSLMin16Uint_t); + HLK_TEST(LessEqual, HLSLMin16Uint_t); + HLK_TEST(GreaterThan, HLSLMin16Uint_t); + HLK_TEST(GreaterEqual, HLSLMin16Uint_t); + HLK_TEST(Equal, HLSLMin16Uint_t); + HLK_TEST(NotEqual, HLSLMin16Uint_t); + + // Select + HLK_TEST(Select, HLSLMin16Uint_t); + + // LoadAndStore + HLK_TEST(LoadAndStore_RDH_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_BAB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_BAB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RDH_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_DT_SB_UAV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_SB_SRV, HLSLMin16Uint_t); + HLK_TEST(LoadAndStore_RD_SB_UAV, HLSLMin16Uint_t); + + // Quad + HLK_TEST(QuadReadLaneAt, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossX, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossY, HLSLMin16Uint_t); + HLK_TEST(QuadReadAcrossDiagonal, HLSLMin16Uint_t); + + // Wave + HLK_WAVEOP_TEST(WaveActiveSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveMin, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveMax, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WavePrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WavePrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixSum, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixProduct, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitAnd, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitOr, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMultiPrefixBitXor, HLSLMin16Uint_t); + HLK_WAVEOP_TEST(WaveMatch, HLSLMin16Uint_t); }; #define HLK_TEST_DOUBLE(Op, DataType) \ diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index 53a1332b7c..1f16b5b6ea 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -4156,6 +4156,10 @@ void MSMain(uint GID : SV_GroupIndex, float MakeDifferent(float A) { return A + 1.0f; } double MakeDifferent(double A) { return A + 1.0; } + min16float MakeDifferent(min16float A) { return A + (min16float)1.0f; } + min16int MakeDifferent(min16int A) { return A ^ 1; } + min16uint MakeDifferent(min16uint A) { return A ^ 1; } + #if __HLSL_ENABLE_16_BIT uint16_t MakeDifferent(uint16_t A) { return A ^ 1; } int16_t MakeDifferent(int16_t A) { return A ^ 1; }