diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index c14b1d06987288..fd64be3df1b59f 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -678,6 +678,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntri
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512, W("EnableAVX512"), 1, "Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BMM, W("EnableAVX512BMM"), 1, "Allows AVX512BMM and depdendent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v2, W("EnableAVX512v2"), 1, "Allows AVX512 IFMA+VBMI and depdendent hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v3, W("EnableAVX512v3"), 1, "Allows AVX512 BITALG+VBMI2+VNNI+VPOPCNTDQ and depdendent hardware intrinsics to be disabled")
diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h
index eff234962cef65..0c2548a16a73f9 100644
--- a/src/coreclr/inc/corinfoinstructionset.h
+++ b/src/coreclr/inc/corinfoinstructionset.h
@@ -97,6 +97,8 @@ enum CORINFO_InstructionSet
InstructionSet_SHA_X64=43,
InstructionSet_WAITPKG_X64=44,
InstructionSet_X86Serialize_X64=45,
+ InstructionSet_AVX512BMM=46,
+ InstructionSet_AVX512BMM_X64=47,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
@@ -144,6 +146,8 @@ enum CORINFO_InstructionSet
InstructionSet_SHA_X64=43,
InstructionSet_WAITPKG_X64=44,
InstructionSet_X86Serialize_X64=45,
+ InstructionSet_AVX512BMM=46,
+ InstructionSet_AVX512BMM_X64=47,
#endif // TARGET_X86
};
@@ -295,6 +299,8 @@ struct CORINFO_InstructionSetFlags
AddInstructionSet(InstructionSet_WAITPKG_X64);
if (HasInstructionSet(InstructionSet_X86Serialize))
AddInstructionSet(InstructionSet_X86Serialize_X64);
+ if (HasInstructionSet(InstructionSet_AVX512BMM))
+ AddInstructionSet(InstructionSet_AVX512BMM);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 80fd72b3ced9af..ff65e2d883a25f 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -128,6 +128,7 @@ bool emitter::Is3OpRmwInstruction(instruction ins)
{
return ((ins >= FIRST_FMA_INSTRUCTION) && (ins <= LAST_FMA_INSTRUCTION)) ||
(IsAVXVNNIFamilyInstruction(ins)) ||
+ ((ins >= FIRST_AVX512BMM_INSTRUCTION) && (ins <= LAST_AVX512BMM_INSTRUCTION)) ||
((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION));
}
}
@@ -3104,8 +3105,9 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
// 0x0000RM11.
leadingBytes = (code >> 16) & 0xFF;
assert(leadingBytes == 0x0F ||
- (m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) && leadingBytes >= 0x00 &&
- leadingBytes <= 0x07) ||
+ ((m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) ||
+ m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512BMM)) &&
+ leadingBytes >= 0x00 && leadingBytes <= 0x07) ||
(IsApxExtendedEvexInstruction(ins) && leadingBytes == 0));
code &= 0xFFFF;
}
@@ -3164,10 +3166,16 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
break;
}
+ case 0x06:
+ {
+ assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512BMM));
+ evexPrefix |= (0x6 << 16);
+ break;
+ }
+
case 0x01:
case 0x02:
case 0x03:
- case 0x06:
case 0x07:
default:
{
@@ -21377,6 +21385,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;
}
+ case INS_vbmacor16x16x16:
+ case INS_vbmacxor16x16x16:
+ case INS_vbitrev:
+ {
+ result.insLatency = PERFSCORE_LATENCY_1C;
+ result.insThroughput = PERFSCORE_THROUGHPUT_1C;
+ break;
+ }
+
default:
{
assert((unsigned)ins < ArrLen(insThroughputInfos));
diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp
index d34171ac299e6a..baa8d3eeba0ece 100644
--- a/src/coreclr/jit/hwintrinsic.cpp
+++ b/src/coreclr/jit/hwintrinsic.cpp
@@ -987,6 +987,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // SHA_X64
{ NI_Illegal, NI_Illegal }, // WAITPKG_X64
{ NI_Illegal, NI_Illegal }, // X86Serialize_X64
+ { FIRST_NI_AVX512BMM, LAST_NI_AVX512BMM }, // AVX512BMM
#elif defined (TARGET_ARM64)
{ FIRST_NI_ArmBase, LAST_NI_ArmBase }, // ArmBase
{ FIRST_NI_AdvSimd, LAST_NI_AdvSimd }, // AdvSimd
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
index 019553921e7431..93f7b08bbf53a9 100644
--- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -911,6 +911,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_AVX512BMM_BitMultiplyMatrix16x16WithOrReduction:
+ case NI_AVX512BMM_BitMultiplyMatrix16x16WithXorReduction:
+ {
+ genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3, instOptions);
+ break;
+ }
+
default:
{
unreached();
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index 584a1994539d64..0dbc14ddd92916 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -1092,6 +1092,12 @@ HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences,
HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
#define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar
+#define FIRST_NI_AVX512BMM NI_AVX512BMM_BitMultiplyMatrix16x16WithOrReduction
+HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithOrReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacor16x16x16, INS_vbmacor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithXorReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacxor16x16x16, INS_vbmacxor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+HARDWARE_INTRINSIC(AVX512BMM, ReverseBits, -1, -1, {INS_invalid, INS_vbitrev, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg)
+#define LAST_NI_AVX512BMM NI_AVX512BMM_ReverseBits
+
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index 93c75e263682db..ec2d51c27a179b 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -215,6 +215,10 @@ CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className)
{
return InstructionSet_AVX512;
}
+ else if (strcmp(className + 7, "mm") == 0)
+ {
+ return InstructionSet_AVX512BMM;
+ }
}
else if ((strcmp(className + 6, "CD") == 0) || (strcmp(className + 6, "DQ") == 0))
{
diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h
index 6234502cced6bd..efae88e02a76eb 100644
--- a/src/coreclr/jit/instrsxarch.h
+++ b/src/coreclr/jit/instrsxarch.h
@@ -1097,6 +1097,12 @@ INST3(vucomish, "vucomish", IUM_RD, BAD_CODE, BAD_
INST3(vp2intersectd, "vp2intersectd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute Intersection Between DWORDS to a Pair of Mask Registers
INST3(vp2intersectq, "vp2intersectq", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute Intersection Between QWORDS to a Pair of Mask Registers
+#define FIRST_AVX512BMM_INSTRUCTION INS_vbmacor16x16x16
+INST3(vbmacor16x16x16, "vbmacor16x16x16", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x80), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply Packed FP16 Values // Convert Scalar FP16 Value to Scalar Single Precision FP Value
+INST3(vbmacxor16x16x16, "vbmacxor16x16x16", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x80), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply Packed FP16 Values // Convert Scalar FP16 Value to Scalar Single Precision FP Value
+INST3(vbitrev, "vbitrev", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x81), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX ) // Multiply Packed FP16 Values // Convert Scalar FP16 Value to Scalar Single Precision FP Value
+#define LAST_AVX512BMM_INSTRUCTION INS_vbitrev
+
// Instructions for AVX10v2
INST3(vcomxsd, "vcomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare double precision floating point values and set flags
INST3(vcomxss, "vcomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare single precision floating point values and set flags
@@ -1131,6 +1137,7 @@ INST3(vucomxsd, "vucomxsd", IUM_RD, BAD_CODE, BAD_
INST3(vucomxss, "vucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags
#define LAST_AVX512_INSTRUCTION INS_vucomxss
+
// id nm um mr mi rm lat tp tt flags
#define FIRST_APX_INSTRUCTION INS_ccmpo
#define FIRST_CCMP_INSTRUCTION INS_ccmpo
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 0ce78c2c314cad..f7d1c57b85d9f1 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -400,6 +400,7 @@ RELEASE_CONFIG_INTEGER(EnableHWIntrinsic, "EnableHWIntrinsic",
RELEASE_CONFIG_INTEGER(EnableAVX, "EnableAVX", 1) // Allows AVX and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX2, "EnableAVX2", 1) // Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512, "EnableAVX512", 1) // Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled
+RELEASE_CONFIG_INTEGER(EnableAVX512BMM, "EnableAVX512BMM", 1) // Allows AVX10v2 and depdendent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512v2, "EnableAVX512v2", 1) // Allows AVX512 IFMA+VBMI and depdendent hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableAVX512v3, "EnableAVX512v3", 1) // Allows AVX512 BITALG+VBMI2+VNNI+VPOPCNTDQ and depdendent hardware intrinsics to be disabled
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index eb3edc5933402f..065e567fedb415 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -10281,7 +10281,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
default:
{
assert((intrinsicId == NI_X86Base_DivRem) || (intrinsicId == NI_X86Base_X64_DivRem) ||
- (intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512));
+ (intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512) ||
+ (intrinsicId >= FIRST_NI_AVX512BMM && intrinsicId <= LAST_NI_AVX512BMM));
TryMakeSrcContainedOrRegOptional(node, op3);
break;
}
diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index a5aed03e99b042..e52fa2c6bdefec 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -1297,6 +1297,11 @@ void EEJitManager::SetCpuInfo()
CPUCompileFlags.Set(InstructionSet_AVX512v3);
}
+ if (((cpuFeatures & XArchIntrinsicConstants_AVX512Bmm) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BMM))
+ {
+ CPUCompileFlags.Set(InstructionSet_AVX512BMM);
+ }
+
if (((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v1))
{
CPUCompileFlags.Set(InstructionSet_AVX10v1);
diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
index c744f85d3e05ce..654f8dbe6c46e0 100644
--- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
+++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
@@ -2729,6 +2729,7 @@
+
@@ -2765,6 +2766,7 @@
+
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.PlatformNotSupported.cs
new file mode 100644
index 00000000000000..5e331d0ab6ce56
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.PlatformNotSupported.cs
@@ -0,0 +1,59 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace System.Runtime.Intrinsics.X86
+{
+ [CLSCompliant(false)]
+ public abstract class Avx512Bmm : Avx512F
+ {
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { [Intrinsic] get { return false; } }
+
+ ///
+ /// __m128i _mm_bitrev_epi8 (__m128i values)
+ /// VBITREV xmm1{k1}{z}, xmm2/m128
+ ///
+ public static Vector128 ReverseBits(Vector128 values) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m256i _mm256_bitrev_epi8 (__m256i values)
+ /// VBITREV ymm1{k1}{z}, ymm2/m256
+ ///
+ public static Vector256 ReverseBits(Vector256 values) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_bitrev_epi8 (__m512i values)
+ /// VBITREV zmm1{k1}{z}, zmm2/m512
+ ///
+ public static Vector512 ReverseBits(Vector512 values) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m256i _mm256_bmacor16x16x16 (__m256i left, __m256i right, __m256i addend)
+ /// VBMACOR16x16x16 ymm1, ymm2, ymm3/m256
+ ///
+ public static Vector256 BitMultiplyMatrix16x16WithOrReduction(Vector256 left, Vector256 right, Vector256 addend) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_bmacor16x16x16 (__m512i left, __m512i right, __m512i addend)
+ /// VBMACOR16x16x16 zmm1, zmm2, zmm3/m256
+ ///
+ public static Vector512 BitMultiplyMatrix16x16WithOrReduction(Vector512 left, Vector512 right, Vector512 addend) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m256i _mm256_bmacxor16x16x16 (__m256i left, __m256i right, __m256i addend)
+ /// VBMACXOR16x16x16 ymm1, ymm2, ymm3/m256
+ ///
+ public static Vector256 BitMultiplyMatrix16x16WithXorReduction(Vector256 left, Vector256 right, Vector256 addend) { throw new PlatformNotSupportedException(); }
+
+ ///
+ /// __m512i _mm512_bmacxor16x16x16 (__m512i left, __m512i right, __m512i addend)
+ /// VBMACXOR16x16x16 zmm1, zmm2, zmm3/m256
+ ///
+ public static Vector512 BitMultiplyMatrix16x16WithXorReduction(Vector512 left, Vector512 right, Vector512 addend) { throw new PlatformNotSupportedException(); }
+ }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.cs
new file mode 100644
index 00000000000000..ef299da60811df
--- /dev/null
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.cs
@@ -0,0 +1,73 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
+
+namespace System.Runtime.Intrinsics.X86
+{
+ [Intrinsic]
+ [CLSCompliant(false)]
+ public abstract class Avx512Bmm : Avx512F
+ {
+ internal Avx512Bmm() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { get => IsSupported; }
+
+ ///
+ /// __m128i _mm_bitrev_epi8 (__m128i values)
+ /// VBITREV xmm1{k1}{z}, xmm2/m128
+ ///
+ public static Vector128 ReverseBits(Vector128 values) => ReverseBits(values);
+
+ ///
+ /// __m256i _mm256_bitrev_epi8 (__m256i values)
+ /// VBITREV ymm1{k1}{z}, ymm2/m256
+ ///
+ public static Vector256 ReverseBits(Vector256 values) => ReverseBits(values);
+
+ ///
+ /// __m512i _mm512_bitrev_epi8 (__m512i values)
+ /// VBITREV zmm1{k1}{z}, zmm2/m512
+ ///
+ public static Vector512 ReverseBits(Vector512 values) => ReverseBits(values);
+
+ ///
+ /// __m256i _mm256_bmacor16x16x16 (__m256i left, __m256i right, __m256i addend)
+ /// VBMACOR16x16x16 ymm1, ymm2, ymm3/m256
+ ///
+ public static Vector256 BitMultiplyMatrix16x16WithOrReduction(Vector256 left, Vector256 right, Vector256 addend) => BitMultiplyMatrix16x16WithOrReduction(left, right, addend);
+
+ ///
+ /// __m512i _mm512_bmacor16x16x16 (__m512i left, __m512i right, __m512i addend)
+ /// VBMACOR16x16x16 zmm1, zmm2, zmm3/m256
+ ///
+ public static Vector512 BitMultiplyMatrix16x16WithOrReduction(Vector512 left, Vector512 right, Vector512 addend) => BitMultiplyMatrix16x16WithOrReduction(left, right, addend);
+
+ ///
+ /// __m256i _mm256_bmacxor16x16x16 (__m256i left, __m256i right, __m256i addend)
+ /// VBMACXOR16x16x16 ymm1, ymm2, ymm3/m256
+ ///
+ public static Vector256 BitMultiplyMatrix16x16WithXorReduction(Vector256 left, Vector256 right, Vector256 addend) => BitMultiplyMatrix16x16WithXorReduction(left, right, addend);
+
+ ///
+ /// __m512i _mm512_bmacxor16x16x16 (__m512i left, __m512i right, __m512i addend)
+ /// VBMACXOR16x16x16 zmm1, zmm2, zmm3/m256
+ ///
+ public static Vector512 BitMultiplyMatrix16x16WithXorReduction(Vector512 left, Vector512 right, Vector512 addend) => BitMultiplyMatrix16x16WithXorReduction(left, right, addend);
+
+ [Intrinsic]
+ public new abstract class X64 : Avx512F.X64
+ {
+ internal X64() { }
+
+ /// Gets a value that indicates whether the APIs in this class are supported.
+ /// if the APIs are supported; otherwise, .
+ /// A value of indicates that the APIs will throw .
+ public static new bool IsSupported { get => IsSupported; }
+ }
+ }
+}
diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
index a1f06da80d0c81..ce3788b2323036 100644
--- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
+++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
@@ -8580,6 +8580,25 @@ internal V512() { }
}
}
+ [System.CLSCompliantAttribute(false)]
+ public abstract partial class Avx512Bmm : System.Runtime.Intrinsics.X86.Avx512F
+ {
+ internal Avx512Bmm() { }
+ public static new bool IsSupported { get { throw null; } }
+ public static System.Runtime.Intrinsics.Vector128 ReverseBits(System.Runtime.Intrinsics.Vector128 values) { throw null; }
+ public static System.Runtime.Intrinsics.Vector256 ReverseBits(System.Runtime.Intrinsics.Vector256 values) { throw null; }
+ public static System.Runtime.Intrinsics.Vector512 ReverseBits(System.Runtime.Intrinsics.Vector512 values) { throw null; }
+ public static System.Runtime.Intrinsics.Vector256 BitMultiplyMatrix16x16WithOrReduction(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right, System.Runtime.Intrinsics.Vector256 addend) { throw null; }
+ public static System.Runtime.Intrinsics.Vector512 BitMultiplyMatrix16x16WithOrReduction(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right, System.Runtime.Intrinsics.Vector512 addend) { throw null; }
+ public static System.Runtime.Intrinsics.Vector256 BitMultiplyMatrix16x16WithXorReduction(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right, System.Runtime.Intrinsics.Vector256 addend) { throw null; }
+ public static System.Runtime.Intrinsics.Vector512 BitMultiplyMatrix16x16WithXorReduction(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right, System.Runtime.Intrinsics.Vector512 addend) { throw null; }
+ public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx512F.X64
+ {
+ internal X64() { }
+ public static new bool IsSupported { get { throw null; } }
+ }
+ }
+
[System.CLSCompliantAttribute(false)]
public abstract partial class Avx512BW : System.Runtime.Intrinsics.X86.Avx512F
{
diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c
index 5b12f4fc76babf..416e607d83dc0d 100644
--- a/src/native/minipal/cpufeatures.c
+++ b/src/native/minipal/cpufeatures.c
@@ -446,6 +446,12 @@ int minipal_getcpufeatures(void)
}
}
}
+
+ __cpuidex(cpuidInfo, 0x80000021, 0x0);
+ if ((cpuidInfo[CPUID_EAX] & (1 << 23)) != 0)
+ {
+ result |= XArchIntrinsicConstants_AVX512Bmm;
+ }
}
#endif // HOST_X86 || HOST_AMD64
diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h
index eebdcae905b0f5..66ac726259b7a7 100644
--- a/src/native/minipal/cpufeatures.h
+++ b/src/native/minipal/cpufeatures.h
@@ -30,6 +30,7 @@
#define XArchIntrinsicConstants_Vaes (1 << 15)
#define XArchIntrinsicConstants_WaitPkg (1 << 16)
#define XArchIntrinsicConstants_X86Serialize (1 << 17)
+#define XArchIntrinsicConstants_AVX512Bmm (1 << 18)
#endif // HOST_X86 || HOST_AMD64
#if defined(HOST_ARM64)
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_r.csproj
new file mode 100644
index 00000000000000..2a11e21688bd2d
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_r.csproj
@@ -0,0 +1,14 @@
+
+
+ X86_Avx512Bmm_handwritten_r
+ true
+
+
+ Embedded
+
+
+
+
+
+
+
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_ro.csproj
new file mode 100644
index 00000000000000..5fc89423c0c5e6
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_ro.csproj
@@ -0,0 +1,14 @@
+
+
+ X86_Avx512Bmm_handwritten_ro
+ true
+
+
+ Embedded
+ True
+
+
+
+
+
+
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_r.csproj
new file mode 100644
index 00000000000000..518b8773b33296
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_r.csproj
@@ -0,0 +1,14 @@
+
+
+ X86_Avx512Bmm_r
+ true
+
+
+ Embedded
+
+
+
+
+
+
+
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_ro.csproj
new file mode 100644
index 00000000000000..02c9a4d59b417b
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_ro.csproj
@@ -0,0 +1,14 @@
+
+
+ X86_Avx512Bmm_ro
+ true
+
+
+ Embedded
+ True
+
+
+
+
+
+
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/HandwrittenProgram.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/HandwrittenProgram.cs
new file mode 100644
index 00000000000000..b17cc98976fab2
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/HandwrittenProgram.cs
@@ -0,0 +1,238 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using Xunit;
+using System.Runtime.CompilerServices;
+
+
+namespace JIT.HardwareIntrinsics.X86._Avx512Bmm
+{
+ public static partial class Program
+ {
+ static Program()
+ {
+
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector256 BitMultiplyMatrix16x16WithOrReduction_Vector256(Vector256 x, Vector256 y, Vector256 z)
+ {
+ return Avx512Bmm.BitMultiplyMatrix16x16WithOrReduction(x, y, z);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector512 BitMultiplyMatrix16x16WithOrReduction_Vector512(Vector512 x, Vector512 y, Vector512 z)
+ {
+ return Avx512Bmm.BitMultiplyMatrix16x16WithOrReduction(x, y, z);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector256 BitMultiplyMatrix16x16WithXorReduction_Vector256(Vector256 x, Vector256 y, Vector256 z)
+ {
+ return Avx512Bmm.BitMultiplyMatrix16x16WithXorReduction(x, y, z);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector512 BitMultiplyMatrix16x16WithXorReduction_Vector512(Vector512 x, Vector512 y, Vector512 z)
+ {
+ return Avx512Bmm.BitMultiplyMatrix16x16WithXorReduction(x, y, z);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector128 ReverseBits_Vector128(Vector128 values)
+ {
+ return Avx512Bmm.ReverseBits(values);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector256 ReverseBits_Vector256(Vector256 values)
+ {
+ return Avx512Bmm.ReverseBits(values);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector512 ReverseBits_Vector512(Vector512 values)
+ {
+ return Avx512Bmm.ReverseBits(values);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector128 ReverseBits_Mask_Vector128(Vector128 values, Vector128 mask)
+ {
+ return Avx512BW.BlendVariable(values, Avx512Bmm.ReverseBits(values), mask);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector256 ReverseBits_Mask_Vector256(Vector256 values, Vector256 mask)
+ {
+ return Avx512BW.BlendVariable(values, Avx512Bmm.ReverseBits(values), mask);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector512 ReverseBits_Mask_Vector512(Vector512 values, Vector512 mask)
+ {
+ return Avx512BW.BlendVariable(values, Avx512Bmm.ReverseBits(values), mask);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector128 ReverseBits_Maskz_Vector128(Vector128 values, Vector128 mask)
+ {
+ return Avx512BW.BlendVariable(Vector128.Zero, Avx512Bmm.ReverseBits(values), mask);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector256 ReverseBits_Maskz_Vector256(Vector256 values, Vector256 mask)
+ {
+ return Avx512BW.BlendVariable(Vector256.Zero, Avx512Bmm.ReverseBits(values), mask);
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static Vector512 ReverseBits_Maskz_Vector512(Vector512 values, Vector512 mask)
+ {
+ return Avx512BW.BlendVariable(Vector512.Zero, Avx512Bmm.ReverseBits(values), mask);
+ }
+
+ [Fact]
+ public static void CheckSupported()
+ {
+ (int Eax, int Ebx, int Ecx, int Edx) = X86Base.CpuId(unchecked((int)0x80000021), (int)0x0);
+ bool isSupported = (Eax & (1 << 23)) != 0;
+ Assert.Equal(isSupported, Avx512Bmm.IsSupported);
+ }
+
+ [Fact]
+ public static void BitMultiplyMatrix16x16WithOrReduction_Vector256_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector256 x = Vector256.Create((ushort)0x1);
+ Vector256 y = Vector256.Create((ushort)0x1);
+ Vector256 z = Vector256.Create((ushort)0x1011);
+ Vector256 result = BitMultiplyMatrix16x16WithOrReduction_Vector256(x, y, z);
+ Assert.Equal(result, Vector256.Create((ushort)0x1011));
+ }
+
+ [Fact]
+ public static void BitMultiplyMatrix16x16WithOrReduction_Vector512_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector512 x = Vector512.Create((ushort)0x1);
+ Vector512 y = Vector512.Create((ushort)0x1);
+ Vector512 z = Vector512.Create((ushort)0x1011);
+ Vector512 result = BitMultiplyMatrix16x16WithOrReduction_Vector512(x, y, z);
+ Assert.Equal(result, Vector512.Create((ushort)0x1011));
+ }
+
+ [Fact]
+ public static void BitMultiplyMatrix16x16WithXorReduction_Vector256_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector256 x = Vector256.Create((ushort)0x1);
+ Vector256 y = Vector256.Create((ushort)0x1);
+ Vector256 z = Vector256.Create((ushort)0x1011);
+ Vector256 result = BitMultiplyMatrix16x16WithXorReduction_Vector256(x, y, z);
+ Assert.Equal(result, Vector256.Create((ushort)0x1010));
+ }
+
+ [Fact]
+ public static void BitMultiplyMatrix16x16WithXorReduction_Vector512_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector512 x = Vector512.Create((ushort)0x1);
+ Vector512 y = Vector512.Create((ushort)0x1);
+ Vector512 z = Vector512.Create((ushort)0x1011);
+ Vector512 result = BitMultiplyMatrix16x16WithXorReduction_Vector512(x, y, z);
+ Assert.Equal(result, Vector512.Create((ushort)0x1010));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector128_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector128 x = Vector128.Create((byte)0xAA);
+ Vector128 y = ReverseBits_Vector128(x);
+ Assert.Equal(y, Vector128.Create((byte)0x55));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector128_Mask_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector128 x = Vector128.Create((byte)0xAA);
+ Vector128 mask = Vector128.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ Vector128 y = ReverseBits_Mask_Vector128(x, mask);
+ Assert.Equal(y, Vector128.Create((byte)0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector128_Maskz_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector128 x = Vector128.Create((byte)0xAA);
+ Vector128 mask = Vector128.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ Vector128 y = ReverseBits_Maskz_Vector128(x, mask);
+ Assert.Equal(y, Vector128.Create((byte)0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector256_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector256 x = Vector256.Create((byte)0xAA);
+ Vector256 y = ReverseBits_Vector256(x);
+ Assert.Equal(y, Vector256.Create((byte)0x55));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector256_Mask_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector256 x = Vector256.Create((byte)0xAA);
+ Vector256 mask = Vector256.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ Vector256 y = ReverseBits_Mask_Vector256(x, mask);
+ Assert.Equal(y, Vector256.Create((byte)0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector256_Maskz_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector256 x = Vector256.Create((byte)0xAA);
+ Vector256 mask = Vector256.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ Vector256 y = ReverseBits_Maskz_Vector256(x, mask);
+ Assert.Equal(y, Vector256.Create((byte)0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector512_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector512 x = Vector512.Create((byte)0xAA);
+ Vector512 y = ReverseBits_Vector512(x);
+ Assert.Equal(y, Vector512.Create((byte)0x55));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector512_Mask_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector512 x = Vector512.Create((byte)0xAA);
+ Vector512 mask = Vector512.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ Vector512 y = ReverseBits_Mask_Vector512(x, mask);
+ Assert.Equal(y, Vector512.Create((byte)0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA));
+ }
+
+ [Fact]
+ public static void ReverseBits_Vector512_Maskz_Test()
+ {
+ if (!Avx512Bmm.IsSupported) return;
+ Vector512 x = Vector512.Create((byte)0xAA);
+ Vector512 mask = Vector512.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+ Vector512 y = ReverseBits_Maskz_Vector512(x, mask);
+ Assert.Equal(y, Vector512.Create((byte)0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00));
+ }
+ }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Program.Avx512Bmm.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Program.Avx512Bmm.cs
new file mode 100644
index 00000000000000..f7eced3e0f9e4c
--- /dev/null
+++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Program.Avx512Bmm.cs
@@ -0,0 +1,16 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86._Avx512Bmm
+{
+ public static partial class Program
+ {
+ static Program()
+ {
+
+ }
+ }
+}