diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index c14b1d06987288..fd64be3df1b59f 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -678,6 +678,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntri RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX, W("EnableAVX"), 1, "Allows AVX and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX2, W("EnableAVX2"), 1, "Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512, W("EnableAVX512"), 1, "Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512BMM, W("EnableAVX512BMM"), 1, "Allows AVX512BMM and depdendent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v2, W("EnableAVX512v2"), 1, "Allows AVX512 IFMA+VBMI and depdendent hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512v3, W("EnableAVX512v3"), 1, "Allows AVX512 BITALG+VBMI2+VNNI+VPOPCNTDQ and depdendent hardware intrinsics to be disabled") diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index eff234962cef65..0c2548a16a73f9 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -97,6 +97,8 @@ enum CORINFO_InstructionSet InstructionSet_SHA_X64=43, InstructionSet_WAITPKG_X64=44, InstructionSet_X86Serialize_X64=45, + InstructionSet_AVX512BMM=46, + InstructionSet_AVX512BMM_X64=47, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -144,6 +146,8 @@ enum CORINFO_InstructionSet InstructionSet_SHA_X64=43, InstructionSet_WAITPKG_X64=44, InstructionSet_X86Serialize_X64=45, + InstructionSet_AVX512BMM=46, + InstructionSet_AVX512BMM_X64=47, #endif // TARGET_X86 }; @@ -295,6 +299,8 @@ struct CORINFO_InstructionSetFlags AddInstructionSet(InstructionSet_WAITPKG_X64); if (HasInstructionSet(InstructionSet_X86Serialize)) AddInstructionSet(InstructionSet_X86Serialize_X64); + if (HasInstructionSet(InstructionSet_AVX512BMM)) + AddInstructionSet(InstructionSet_AVX512BMM); #endif // TARGET_AMD64 #ifdef TARGET_X86 #endif // TARGET_X86 diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 80fd72b3ced9af..ff65e2d883a25f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -128,6 +128,7 @@ bool emitter::Is3OpRmwInstruction(instruction ins) { return ((ins >= FIRST_FMA_INSTRUCTION) && (ins <= LAST_FMA_INSTRUCTION)) || (IsAVXVNNIFamilyInstruction(ins)) || + ((ins >= FIRST_AVX512BMM_INSTRUCTION) && (ins <= LAST_AVX512BMM_INSTRUCTION)) || ((ins >= FIRST_AVXIFMA_INSTRUCTION) && (ins <= LAST_AVXIFMA_INSTRUCTION)); } } @@ -3104,8 +3105,9 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co // 0x0000RM11. leadingBytes = (code >> 16) & 0xFF; assert(leadingBytes == 0x0F || - (m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) && leadingBytes >= 0x00 && - leadingBytes <= 0x07) || + ((m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX10v2) || + m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512BMM)) && + leadingBytes >= 0x00 && leadingBytes <= 0x07) || (IsApxExtendedEvexInstruction(ins) && leadingBytes == 0)); code &= 0xFFFF; } @@ -3164,10 +3166,16 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co break; } + case 0x06: + { + assert(m_compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512BMM)); + evexPrefix |= (0x6 << 16); + break; + } + case 0x01: case 0x02: case 0x03: - case 0x06: case 0x07: default: { @@ -21377,6 +21385,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; } + case INS_vbmacor16x16x16: + case INS_vbmacxor16x16x16: + case INS_vbitrev: + { + result.insLatency = PERFSCORE_LATENCY_1C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + } + default: { assert((unsigned)ins < ArrLen(insThroughputInfos)); diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index d34171ac299e6a..baa8d3eeba0ece 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -987,6 +987,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = { { NI_Illegal, NI_Illegal }, // SHA_X64 { NI_Illegal, NI_Illegal }, // WAITPKG_X64 { NI_Illegal, NI_Illegal }, // X86Serialize_X64 + { FIRST_NI_AVX512BMM, LAST_NI_AVX512BMM }, // AVX512BMM #elif defined (TARGET_ARM64) { FIRST_NI_ArmBase, LAST_NI_ArmBase }, // ArmBase { FIRST_NI_AdvSimd, LAST_NI_AdvSimd }, // AdvSimd diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 019553921e7431..93f7b08bbf53a9 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -911,6 +911,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + case NI_AVX512BMM_BitMultiplyMatrix16x16WithOrReduction: + case NI_AVX512BMM_BitMultiplyMatrix16x16WithXorReduction: + { + genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3, instOptions); + break; + } + default: { unreached(); diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 584a1994539d64..0dbc14ddd92916 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1092,6 +1092,12 @@ HARDWARE_INTRINSIC(AVX10v2, MultipleSumAbsoluteDifferences, HARDWARE_INTRINSIC(AVX10v2, StoreScalar, 16, 2, {INS_invalid, INS_invalid, INS_vmovw_simd, INS_vmovw_simd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) #define LAST_NI_AVX10v2 NI_AVX10v2_StoreScalar +#define FIRST_NI_AVX512BMM NI_AVX512BMM_BitMultiplyMatrix16x16WithOrReduction +HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithOrReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacor16x16x16, INS_vbmacor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512BMM, BitMultiplyMatrix16x16WithXorReduction, -1, -1, {INS_invalid, INS_invalid, INS_vbmacxor16x16x16, INS_vbmacxor16x16x16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512BMM, ReverseBits, -1, -1, {INS_invalid, INS_vbitrev, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +#define LAST_NI_AVX512BMM NI_AVX512BMM_ReverseBits + // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 93c75e263682db..ec2d51c27a179b 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -215,6 +215,10 @@ CORINFO_InstructionSet Compiler::lookupInstructionSet(const char* className) { return InstructionSet_AVX512; } + else if (strcmp(className + 7, "mm") == 0) + { + return InstructionSet_AVX512BMM; + } } else if ((strcmp(className + 6, "CD") == 0) || (strcmp(className + 6, "DQ") == 0)) { diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 6234502cced6bd..efae88e02a76eb 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -1097,6 +1097,12 @@ INST3(vucomish, "vucomish", IUM_RD, BAD_CODE, BAD_ INST3(vp2intersectd, "vp2intersectd", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_32Bit | KMask_Base4 | REX_W0 | Encoding_EVEX) // Compute Intersection Between DWORDS to a Pair of Mask Registers INST3(vp2intersectq, "vp2intersectq", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0x68), ILLEGAL, ILLEGAL, INS_TT_FULL, Input_64Bit | KMask_Base2 | REX_W1 | Encoding_EVEX) // Compute Intersection Between QWORDS to a Pair of Mask Registers +#define FIRST_AVX512BMM_INSTRUCTION INS_vbmacor16x16x16 +INST3(vbmacor16x16x16, "vbmacor16x16x16", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x80), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply Packed FP16 Values // Convert Scalar FP16 Value to Scalar Single Precision FP Value +INST3(vbmacxor16x16x16, "vbmacxor16x16x16", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x80), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Multiply Packed FP16 Values // Convert Scalar FP16 Value to Scalar Single Precision FP Value +INST3(vbitrev, "vbitrev", IUM_WR, BAD_CODE, BAD_CODE, PCKFLTMAP(0x06, 0x81), ILLEGAL, ILLEGAL, INS_TT_FULL_MEM, Input_8Bit | KMask_Base16 | REX_W0 | Encoding_EVEX ) // Multiply Packed FP16 Values // Convert Scalar FP16 Value to Scalar Single Precision FP Value +#define LAST_AVX512BMM_INSTRUCTION INS_vbitrev + // Instructions for AVX10v2 INST3(vcomxsd, "vcomxsd", IUM_RD, BAD_CODE, BAD_CODE, SSEFLT(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare double precision floating point values and set flags INST3(vcomxss, "vcomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2f), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Compare single precision floating point values and set flags @@ -1131,6 +1137,7 @@ INST3(vucomxsd, "vucomxsd", IUM_RD, BAD_CODE, BAD_ INST3(vucomxss, "vucomxss", IUM_RD, BAD_CODE, BAD_CODE, SSEDBL(0x2E), 3C, 1C, INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | Writes_OF | Writes_SF | Writes_ZF | Writes_PF | Writes_CF | Resets_AF) // Perform an unordered compare of single precision floating point values and set flags #define LAST_AVX512_INSTRUCTION INS_vucomxss + // id nm um mr mi rm lat tp tt flags #define FIRST_APX_INSTRUCTION INS_ccmpo #define FIRST_CCMP_INSTRUCTION INS_ccmpo diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 0ce78c2c314cad..f7d1c57b85d9f1 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -400,6 +400,7 @@ RELEASE_CONFIG_INTEGER(EnableHWIntrinsic, "EnableHWIntrinsic", RELEASE_CONFIG_INTEGER(EnableAVX, "EnableAVX", 1) // Allows AVX and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX2, "EnableAVX2", 1) // Allows AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE and dependent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX512, "EnableAVX512", 1) // Allows AVX512 F+BW+CD+DQ+VL and depdendent hardware intrinsics to be disabled +RELEASE_CONFIG_INTEGER(EnableAVX512BMM, "EnableAVX512BMM", 1) // Allows AVX10v2 and depdendent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX512v2, "EnableAVX512v2", 1) // Allows AVX512 IFMA+VBMI and depdendent hardware intrinsics to be disabled RELEASE_CONFIG_INTEGER(EnableAVX512v3, "EnableAVX512v3", 1) // Allows AVX512 BITALG+VBMI2+VNNI+VPOPCNTDQ and depdendent hardware intrinsics to be disabled diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index eb3edc5933402f..065e567fedb415 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -10281,7 +10281,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) default: { assert((intrinsicId == NI_X86Base_DivRem) || (intrinsicId == NI_X86Base_X64_DivRem) || - (intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512)); + (intrinsicId >= FIRST_NI_AVXVNNI && intrinsicId <= LAST_NI_AVXVNNIINT_V512) || + (intrinsicId >= FIRST_NI_AVX512BMM && intrinsicId <= LAST_NI_AVX512BMM)); TryMakeSrcContainedOrRegOptional(node, op3); break; } diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index a5aed03e99b042..e52fa2c6bdefec 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1297,6 +1297,11 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_AVX512v3); } + if (((cpuFeatures & XArchIntrinsicConstants_AVX512Bmm) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BMM)) + { + CPUCompileFlags.Set(InstructionSet_AVX512BMM); + } + if (((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v1)) { CPUCompileFlags.Set(InstructionSet_AVX10v1); diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index c744f85d3e05ce..654f8dbe6c46e0 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2729,6 +2729,7 @@ + @@ -2765,6 +2766,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.PlatformNotSupported.cs new file mode 100644 index 00000000000000..5e331d0ab6ce56 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.PlatformNotSupported.cs @@ -0,0 +1,59 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + [CLSCompliant(false)] + public abstract class Avx512Bmm : Avx512F + { + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { [Intrinsic] get { return false; } } + + /// + /// __m128i _mm_bitrev_epi8 (__m128i values) + /// VBITREV xmm1{k1}{z}, xmm2/m128 + /// + public static Vector128 ReverseBits(Vector128 values) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_bitrev_epi8 (__m256i values) + /// VBITREV ymm1{k1}{z}, ymm2/m256 + /// + public static Vector256 ReverseBits(Vector256 values) { throw new PlatformNotSupportedException(); } + + /// + /// __m512i _mm512_bitrev_epi8 (__m512i values) + /// VBITREV zmm1{k1}{z}, zmm2/m512 + /// + public static Vector512 ReverseBits(Vector512 values) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_bmacor16x16x16 (__m256i left, __m256i right, __m256i addend) + /// VBMACOR16x16x16 ymm1, ymm2, ymm3/m256 + /// + public static Vector256 BitMultiplyMatrix16x16WithOrReduction(Vector256 left, Vector256 right, Vector256 addend) { throw new PlatformNotSupportedException(); } + + /// + /// __m512i _mm512_bmacor16x16x16 (__m512i left, __m512i right, __m512i addend) + /// VBMACOR16x16x16 zmm1, zmm2, zmm3/m256 + /// + public static Vector512 BitMultiplyMatrix16x16WithOrReduction(Vector512 left, Vector512 right, Vector512 addend) { throw new PlatformNotSupportedException(); } + + /// + /// __m256i _mm256_bmacxor16x16x16 (__m256i left, __m256i right, __m256i addend) + /// VBMACXOR16x16x16 ymm1, ymm2, ymm3/m256 + /// + public static Vector256 BitMultiplyMatrix16x16WithXorReduction(Vector256 left, Vector256 right, Vector256 addend) { throw new PlatformNotSupportedException(); } + + /// + /// __m512i _mm512_bmacxor16x16x16 (__m512i left, __m512i right, __m512i addend) + /// VBMACXOR16x16x16 zmm1, zmm2, zmm3/m256 + /// + public static Vector512 BitMultiplyMatrix16x16WithXorReduction(Vector512 left, Vector512 right, Vector512 addend) { throw new PlatformNotSupportedException(); } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.cs new file mode 100644 index 00000000000000..ef299da60811df --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx512Bmm.cs @@ -0,0 +1,73 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics.CodeAnalysis; +using System.Runtime.CompilerServices; + +namespace System.Runtime.Intrinsics.X86 +{ + [Intrinsic] + [CLSCompliant(false)] + public abstract class Avx512Bmm : Avx512F + { + internal Avx512Bmm() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + + /// + /// __m128i _mm_bitrev_epi8 (__m128i values) + /// VBITREV xmm1{k1}{z}, xmm2/m128 + /// + public static Vector128 ReverseBits(Vector128 values) => ReverseBits(values); + + /// + /// __m256i _mm256_bitrev_epi8 (__m256i values) + /// VBITREV ymm1{k1}{z}, ymm2/m256 + /// + public static Vector256 ReverseBits(Vector256 values) => ReverseBits(values); + + /// + /// __m512i _mm512_bitrev_epi8 (__m512i values) + /// VBITREV zmm1{k1}{z}, zmm2/m512 + /// + public static Vector512 ReverseBits(Vector512 values) => ReverseBits(values); + + /// + /// __m256i _mm256_bmacor16x16x16 (__m256i left, __m256i right, __m256i addend) + /// VBMACOR16x16x16 ymm1, ymm2, ymm3/m256 + /// + public static Vector256 BitMultiplyMatrix16x16WithOrReduction(Vector256 left, Vector256 right, Vector256 addend) => BitMultiplyMatrix16x16WithOrReduction(left, right, addend); + + /// + /// __m512i _mm512_bmacor16x16x16 (__m512i left, __m512i right, __m512i addend) + /// VBMACOR16x16x16 zmm1, zmm2, zmm3/m256 + /// + public static Vector512 BitMultiplyMatrix16x16WithOrReduction(Vector512 left, Vector512 right, Vector512 addend) => BitMultiplyMatrix16x16WithOrReduction(left, right, addend); + + /// + /// __m256i _mm256_bmacxor16x16x16 (__m256i left, __m256i right, __m256i addend) + /// VBMACXOR16x16x16 ymm1, ymm2, ymm3/m256 + /// + public static Vector256 BitMultiplyMatrix16x16WithXorReduction(Vector256 left, Vector256 right, Vector256 addend) => BitMultiplyMatrix16x16WithXorReduction(left, right, addend); + + /// + /// __m512i _mm512_bmacxor16x16x16 (__m512i left, __m512i right, __m512i addend) + /// VBMACXOR16x16x16 zmm1, zmm2, zmm3/m256 + /// + public static Vector512 BitMultiplyMatrix16x16WithXorReduction(Vector512 left, Vector512 right, Vector512 addend) => BitMultiplyMatrix16x16WithXorReduction(left, right, addend); + + [Intrinsic] + public new abstract class X64 : Avx512F.X64 + { + internal X64() { } + + /// Gets a value that indicates whether the APIs in this class are supported. + /// if the APIs are supported; otherwise, . + /// A value of indicates that the APIs will throw . + public static new bool IsSupported { get => IsSupported; } + } + } +} diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index a1f06da80d0c81..ce3788b2323036 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -8580,6 +8580,25 @@ internal V512() { } } } + [System.CLSCompliantAttribute(false)] + public abstract partial class Avx512Bmm : System.Runtime.Intrinsics.X86.Avx512F + { + internal Avx512Bmm() { } + public static new bool IsSupported { get { throw null; } } + public static System.Runtime.Intrinsics.Vector128 ReverseBits(System.Runtime.Intrinsics.Vector128 values) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ReverseBits(System.Runtime.Intrinsics.Vector256 values) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ReverseBits(System.Runtime.Intrinsics.Vector512 values) { throw null; } + public static System.Runtime.Intrinsics.Vector256 BitMultiplyMatrix16x16WithOrReduction(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right, System.Runtime.Intrinsics.Vector256 addend) { throw null; } + public static System.Runtime.Intrinsics.Vector512 BitMultiplyMatrix16x16WithOrReduction(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right, System.Runtime.Intrinsics.Vector512 addend) { throw null; } + public static System.Runtime.Intrinsics.Vector256 BitMultiplyMatrix16x16WithXorReduction(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right, System.Runtime.Intrinsics.Vector256 addend) { throw null; } + public static System.Runtime.Intrinsics.Vector512 BitMultiplyMatrix16x16WithXorReduction(System.Runtime.Intrinsics.Vector512 left, System.Runtime.Intrinsics.Vector512 right, System.Runtime.Intrinsics.Vector512 addend) { throw null; } + public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx512F.X64 + { + internal X64() { } + public static new bool IsSupported { get { throw null; } } + } + } + [System.CLSCompliantAttribute(false)] public abstract partial class Avx512BW : System.Runtime.Intrinsics.X86.Avx512F { diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 5b12f4fc76babf..416e607d83dc0d 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -446,6 +446,12 @@ int minipal_getcpufeatures(void) } } } + + __cpuidex(cpuidInfo, 0x80000021, 0x0); + if ((cpuidInfo[CPUID_EAX] & (1 << 23)) != 0) + { + result |= XArchIntrinsicConstants_AVX512Bmm; + } } #endif // HOST_X86 || HOST_AMD64 diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index eebdcae905b0f5..66ac726259b7a7 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -30,6 +30,7 @@ #define XArchIntrinsicConstants_Vaes (1 << 15) #define XArchIntrinsicConstants_WaitPkg (1 << 16) #define XArchIntrinsicConstants_X86Serialize (1 << 17) +#define XArchIntrinsicConstants_AVX512Bmm (1 << 18) #endif // HOST_X86 || HOST_AMD64 #if defined(HOST_ARM64) diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_r.csproj new file mode 100644 index 00000000000000..2a11e21688bd2d --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_r.csproj @@ -0,0 +1,14 @@ + + + X86_Avx512Bmm_handwritten_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_ro.csproj new file mode 100644 index 00000000000000..5fc89423c0c5e6 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_handwritten_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Avx512Bmm_handwritten_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_r.csproj new file mode 100644 index 00000000000000..518b8773b33296 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_r.csproj @@ -0,0 +1,14 @@ + + + X86_Avx512Bmm_r + true + + + Embedded + + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_ro.csproj new file mode 100644 index 00000000000000..02c9a4d59b417b --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Avx512Bmm_ro.csproj @@ -0,0 +1,14 @@ + + + X86_Avx512Bmm_ro + true + + + Embedded + True + + + + + + diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/HandwrittenProgram.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/HandwrittenProgram.cs new file mode 100644 index 00000000000000..b17cc98976fab2 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/HandwrittenProgram.cs @@ -0,0 +1,238 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using Xunit; +using System.Runtime.CompilerServices; + + +namespace JIT.HardwareIntrinsics.X86._Avx512Bmm +{ + public static partial class Program + { + static Program() + { + + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector256 BitMultiplyMatrix16x16WithOrReduction_Vector256(Vector256 x, Vector256 y, Vector256 z) + { + return Avx512Bmm.BitMultiplyMatrix16x16WithOrReduction(x, y, z); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector512 BitMultiplyMatrix16x16WithOrReduction_Vector512(Vector512 x, Vector512 y, Vector512 z) + { + return Avx512Bmm.BitMultiplyMatrix16x16WithOrReduction(x, y, z); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector256 BitMultiplyMatrix16x16WithXorReduction_Vector256(Vector256 x, Vector256 y, Vector256 z) + { + return Avx512Bmm.BitMultiplyMatrix16x16WithXorReduction(x, y, z); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector512 BitMultiplyMatrix16x16WithXorReduction_Vector512(Vector512 x, Vector512 y, Vector512 z) + { + return Avx512Bmm.BitMultiplyMatrix16x16WithXorReduction(x, y, z); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector128 ReverseBits_Vector128(Vector128 values) + { + return Avx512Bmm.ReverseBits(values); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector256 ReverseBits_Vector256(Vector256 values) + { + return Avx512Bmm.ReverseBits(values); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector512 ReverseBits_Vector512(Vector512 values) + { + return Avx512Bmm.ReverseBits(values); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector128 ReverseBits_Mask_Vector128(Vector128 values, Vector128 mask) + { + return Avx512BW.BlendVariable(values, Avx512Bmm.ReverseBits(values), mask); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector256 ReverseBits_Mask_Vector256(Vector256 values, Vector256 mask) + { + return Avx512BW.BlendVariable(values, Avx512Bmm.ReverseBits(values), mask); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector512 ReverseBits_Mask_Vector512(Vector512 values, Vector512 mask) + { + return Avx512BW.BlendVariable(values, Avx512Bmm.ReverseBits(values), mask); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector128 ReverseBits_Maskz_Vector128(Vector128 values, Vector128 mask) + { + return Avx512BW.BlendVariable(Vector128.Zero, Avx512Bmm.ReverseBits(values), mask); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector256 ReverseBits_Maskz_Vector256(Vector256 values, Vector256 mask) + { + return Avx512BW.BlendVariable(Vector256.Zero, Avx512Bmm.ReverseBits(values), mask); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static Vector512 ReverseBits_Maskz_Vector512(Vector512 values, Vector512 mask) + { + return Avx512BW.BlendVariable(Vector512.Zero, Avx512Bmm.ReverseBits(values), mask); + } + + [Fact] + public static void CheckSupported() + { + (int Eax, int Ebx, int Ecx, int Edx) = X86Base.CpuId(unchecked((int)0x80000021), (int)0x0); + bool isSupported = (Eax & (1 << 23)) != 0; + Assert.Equal(isSupported, Avx512Bmm.IsSupported); + } + + [Fact] + public static void BitMultiplyMatrix16x16WithOrReduction_Vector256_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector256 x = Vector256.Create((ushort)0x1); + Vector256 y = Vector256.Create((ushort)0x1); + Vector256 z = Vector256.Create((ushort)0x1011); + Vector256 result = BitMultiplyMatrix16x16WithOrReduction_Vector256(x, y, z); + Assert.Equal(result, Vector256.Create((ushort)0x1011)); + } + + [Fact] + public static void BitMultiplyMatrix16x16WithOrReduction_Vector512_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector512 x = Vector512.Create((ushort)0x1); + Vector512 y = Vector512.Create((ushort)0x1); + Vector512 z = Vector512.Create((ushort)0x1011); + Vector512 result = BitMultiplyMatrix16x16WithOrReduction_Vector512(x, y, z); + Assert.Equal(result, Vector512.Create((ushort)0x1011)); + } + + [Fact] + public static void BitMultiplyMatrix16x16WithXorReduction_Vector256_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector256 x = Vector256.Create((ushort)0x1); + Vector256 y = Vector256.Create((ushort)0x1); + Vector256 z = Vector256.Create((ushort)0x1011); + Vector256 result = BitMultiplyMatrix16x16WithXorReduction_Vector256(x, y, z); + Assert.Equal(result, Vector256.Create((ushort)0x1010)); + } + + [Fact] + public static void BitMultiplyMatrix16x16WithXorReduction_Vector512_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector512 x = Vector512.Create((ushort)0x1); + Vector512 y = Vector512.Create((ushort)0x1); + Vector512 z = Vector512.Create((ushort)0x1011); + Vector512 result = BitMultiplyMatrix16x16WithXorReduction_Vector512(x, y, z); + Assert.Equal(result, Vector512.Create((ushort)0x1010)); + } + + [Fact] + public static void ReverseBits_Vector128_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector128 x = Vector128.Create((byte)0xAA); + Vector128 y = ReverseBits_Vector128(x); + Assert.Equal(y, Vector128.Create((byte)0x55)); + } + + [Fact] + public static void ReverseBits_Vector128_Mask_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector128 x = Vector128.Create((byte)0xAA); + Vector128 mask = Vector128.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + Vector128 y = ReverseBits_Mask_Vector128(x, mask); + Assert.Equal(y, Vector128.Create((byte)0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA)); + } + + [Fact] + public static void ReverseBits_Vector128_Maskz_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector128 x = Vector128.Create((byte)0xAA); + Vector128 mask = Vector128.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + Vector128 y = ReverseBits_Maskz_Vector128(x, mask); + Assert.Equal(y, Vector128.Create((byte)0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)); + } + + [Fact] + public static void ReverseBits_Vector256_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector256 x = Vector256.Create((byte)0xAA); + Vector256 y = ReverseBits_Vector256(x); + Assert.Equal(y, Vector256.Create((byte)0x55)); + } + + [Fact] + public static void ReverseBits_Vector256_Mask_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector256 x = Vector256.Create((byte)0xAA); + Vector256 mask = Vector256.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + Vector256 y = ReverseBits_Mask_Vector256(x, mask); + Assert.Equal(y, Vector256.Create((byte)0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA)); + } + + [Fact] + public static void ReverseBits_Vector256_Maskz_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector256 x = Vector256.Create((byte)0xAA); + Vector256 mask = Vector256.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + Vector256 y = ReverseBits_Maskz_Vector256(x, mask); + Assert.Equal(y, Vector256.Create((byte)0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)); + } + + [Fact] + public static void ReverseBits_Vector512_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector512 x = Vector512.Create((byte)0xAA); + Vector512 y = ReverseBits_Vector512(x); + Assert.Equal(y, Vector512.Create((byte)0x55)); + } + + [Fact] + public static void ReverseBits_Vector512_Mask_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector512 x = Vector512.Create((byte)0xAA); + Vector512 mask = Vector512.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + Vector512 y = ReverseBits_Mask_Vector512(x, mask); + Assert.Equal(y, Vector512.Create((byte)0x55, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA)); + } + + [Fact] + public static void ReverseBits_Vector512_Maskz_Test() + { + if (!Avx512Bmm.IsSupported) return; + Vector512 x = Vector512.Create((byte)0xAA); + Vector512 mask = Vector512.Create(0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + Vector512 y = ReverseBits_Maskz_Vector512(x, mask); + Assert.Equal(y, Vector512.Create((byte)0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00)); + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Program.Avx512Bmm.cs b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Program.Avx512Bmm.cs new file mode 100644 index 00000000000000..f7eced3e0f9e4c --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86_Avx512/Avx512Bmm/Program.Avx512Bmm.cs @@ -0,0 +1,16 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; + +namespace JIT.HardwareIntrinsics.X86._Avx512Bmm +{ + public static partial class Program + { + static Program() + { + + } + } +}