diff --git a/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj b/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj index 06555e0e92fc41..0a9790f49767db 100644 --- a/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj +++ b/src/libraries/System.IO.Hashing/src/System.IO.Hashing.csproj @@ -15,12 +15,10 @@ System.IO.Hashing.XxHash32 - - @@ -35,9 +33,9 @@ System.IO.Hashing.XxHash32 - - - + + + diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Arm.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Arm.cs deleted file mode 100644 index dae53164389da0..00000000000000 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Arm.cs +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using ArmCrc = System.Runtime.Intrinsics.Arm.Crc32; - -namespace System.IO.Hashing -{ - public partial class Crc32 - { - private static uint UpdateScalarArm64(uint crc, ReadOnlySpan source) - { - Debug.Assert(ArmCrc.Arm64.IsSupported, "ARM CRC support is required."); - - // Compute in 8 byte chunks - if (source.Length >= sizeof(ulong)) - { - ref byte ptr = ref MemoryMarshal.GetReference(source); - int longLength = source.Length & ~0x7; // Exclude trailing bytes not a multiple of 8 - - for (int i = 0; i < longLength; i += sizeof(ulong)) - { - crc = ArmCrc.Arm64.ComputeCrc32(crc, - Unsafe.ReadUnaligned(ref Unsafe.Add(ref ptr, i))); - } - - source = source.Slice(longLength); - } - - // Compute remaining bytes - for (int i = 0; i < source.Length; i++) - { - crc = ArmCrc.ComputeCrc32(crc, source[i]); - } - - return crc; - } - - private static uint UpdateScalarArm32(uint crc, ReadOnlySpan source) - { - Debug.Assert(ArmCrc.IsSupported, "ARM CRC support is required."); - - // Compute in 4 byte chunks - if (source.Length >= sizeof(uint)) - { - ref byte ptr = ref MemoryMarshal.GetReference(source); - int intLength = source.Length & ~0x3; // Exclude trailing bytes not a multiple of 4 - - for (int i = 0; i < intLength; i += sizeof(uint)) - { - crc = ArmCrc.ComputeCrc32(crc, - Unsafe.ReadUnaligned(ref Unsafe.Add(ref ptr, i))); - } - - source = source.Slice(intLength); - } - - // Compute remaining bytes - for (int i = 0; i < source.Length; i++) - { - crc = ArmCrc.ComputeCrc32(crc, source[i]); - } - - return crc; - } - } -} diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Table.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Table.cs deleted file mode 100644 index 0154b062afaabd..00000000000000 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Table.cs +++ /dev/null @@ -1,76 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Numerics; - -namespace System.IO.Hashing -{ - public sealed partial class Crc32 : NonCryptographicHashAlgorithm - { - /// CRC-32 transition table - /// - /// While this implementation is based on the standard CRC-32 polynomial, - /// x32 + x26 + x23 + x22 + x16 + x12 + x11 + x10 + x8 + x7 + x5 + x4 + x2 + x1 + x0, - /// this version uses reflected bit ordering, so 0x04C11DB7 becomes 0xEDB88320 - /// - private static ReadOnlySpan CrcLookup => - [ - // Generated by GenerateTable(0xEDB88320u): - // - // static uint[] GenerateTable(uint reflectedPolynomial) - // { - // uint[] table = new uint[256]; - // for (int i = 0; i < table.Length; i++) - // { - // uint val = (uint)i; - // for (int j = 0; j < 8; j++) - // { - // if ((val & 0b0000_0001) == 0) - // { - // val >>= 1; - // } - // else - // { - // val = (val >> 1) ^ reflectedPolynomial; - // } - // } - // table[i] = val; - // } - // return table; - // } - - 0x0, 0x77073096, 0xEE0E612C, 0x990951BA, 0x76DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, - 0xEDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x9B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, - 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, - 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, - 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, - 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, - 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, - 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, - 0x76DC4190, 0x1DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x6B6B51F, 0x9FBFE4A5, 0xE8B8D433, - 0x7807C9A2, 0xF00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x86D3D2D, 0x91646C97, 0xE6635C01, - 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, - 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, - 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, - 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, - 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, - 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, - 0xEDB88320, 0x9ABFB3B6, 0x3B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x4DB2615, 0x73DC1683, - 0xE3630B12, 0x94643B84, 0xD6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0xA00AE27, 0x7D079EB1, - 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, - 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, - 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, - 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, - 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, - 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, - 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x26D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x5005713, - 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0xCB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0xBDBDF21, - 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, - 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, - 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, - 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, - 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, - 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, - ]; - } -} diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Vectorized.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Vectorized.cs deleted file mode 100644 index f263b6384acfc2..00000000000000 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.Vectorized.cs +++ /dev/null @@ -1,129 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using static System.IO.Hashing.VectorHelper; - -namespace System.IO.Hashing -{ - public partial class Crc32 - { - // We check for little endian byte order here in case we're ever on ARM in big endian mode. - // All of these checks except the length check are elided by JIT, so the JITted implementation - // will be either a return false or a length check against a constant. This means this method - // should be inlined into the caller. - private static bool CanBeVectorized(ReadOnlySpan source) => - BitConverter.IsLittleEndian - && VectorHelper.IsSupported - // Vectorization can process spans as short as a single vector (16 bytes), but if ARM intrinsics are supported they - // seem to be more performant for spans less than 8 vectors (128 bytes). - && source.Length >= Vector128.Count * (System.Runtime.Intrinsics.Arm.Crc32.IsSupported ? 8 : 1); - - // Processes the bytes in source in 64 byte chunks using carryless/polynomial multiplication intrinsics, - // followed by processing 16 byte chunks, and then processing remaining bytes individually. Requires - // little endian byte order and support for PCLMULQDQ intrinsics on Intel architecture or AES and - // AdvSimd intrinsics on ARM architecture. Based on the algorithm put forth in the Intel paper "Fast CRC - // Computation for Generic Polynomials Using PCLMULQDQ Instruction" in December, 2009. - // https://github.com/intel/isa-l/blob/33a2d9484595c2d6516c920ce39a694c144ddf69/crc/crc32_ieee_by4.asm - // https://github.com/SixLabors/ImageSharp/blob/f4f689ce67ecbcc35cebddba5aacb603e6d1068a/src/ImageSharp/Formats/Png/Zlib/Crc32.cs#L80 - // - // Marking this as noinline so the JIT doesn't try and inline this and end up not inlining some of the calls it makes. - // - [MethodImpl(MethodImplOptions.NoInlining)] - private static uint UpdateVectorized(uint crc, ReadOnlySpan source) - { - Debug.Assert(CanBeVectorized(source), "source cannot be vectorized."); - - // Work with a reference to where we're at in the ReadOnlySpan and a local length - // to avoid extraneous range checks. - ref byte srcRef = ref MemoryMarshal.GetReference(source); - int length = source.Length; - - Vector128 kConstants; - Vector128 x1; // Accumulator for the new CRC - Vector128 x2; - - if (length >= Vector128.Count * 8) - { - x1 = Vector128.LoadUnsafe(ref srcRef).AsUInt64(); - x2 = Vector128.LoadUnsafe(ref srcRef, 16).AsUInt64(); - Vector128 x3 = Vector128.LoadUnsafe(ref srcRef, 32).AsUInt64(); - Vector128 x4 = Vector128.LoadUnsafe(ref srcRef, 48).AsUInt64(); - - srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4); - length -= Vector128.Count * 4; - - // Load and XOR the initial CRC value - x1 ^= Vector128.CreateScalar(crc).AsUInt64(); - - kConstants = Vector128.Create(0x0154442bd4UL, 0x01c6e41596UL); // k1, k2 - - // Parallel fold blocks of 64, if any. - do - { - Vector128 y5 = Vector128.LoadUnsafe(ref srcRef).AsUInt64(); - Vector128 y6 = Vector128.LoadUnsafe(ref srcRef, 16).AsUInt64(); - Vector128 y7 = Vector128.LoadUnsafe(ref srcRef, 32).AsUInt64(); - Vector128 y8 = Vector128.LoadUnsafe(ref srcRef, 48).AsUInt64(); - - x1 = FoldPolynomialPair(y5, x1, kConstants); - x2 = FoldPolynomialPair(y6, x2, kConstants); - x3 = FoldPolynomialPair(y7, x3, kConstants); - x4 = FoldPolynomialPair(y8, x4, kConstants); - - srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4); - length -= Vector128.Count * 4; - } while (length >= Vector128.Count * 4); - - // Fold into 128-bits. - kConstants = Vector128.Create(0x01751997d0UL, 0x00ccaa009eUL); // k3, k4 - x1 = FoldPolynomialPair(x2, x1, kConstants); - x1 = FoldPolynomialPair(x3, x1, kConstants); - x1 = FoldPolynomialPair(x4, x1, kConstants); - } - else - { - // For shorter sources just load the first vector and XOR with the CRC - Debug.Assert(length >= 16); - - x1 = Vector128.LoadUnsafe(ref srcRef).AsUInt64(); - x1 ^= Vector128.CreateScalar(crc).AsUInt64(); - - srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); - length -= Vector128.Count; - } - - // Single fold blocks of 16, if any. - while (length >= Vector128.Count) - { - x1 = FoldPolynomialPair(Vector128.LoadUnsafe(ref srcRef).AsUInt64(), x1, - Vector128.Create(0x01751997d0UL, 0x00ccaa009eUL)); - - srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); - length -= Vector128.Count; - } - - // Fold 128 bits to 64 bits. - Vector128 bitmask = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); - x1 = ShiftRightBytesInVector(x1, 8) ^ - CarrylessMultiplyLower(x1, Vector128.CreateScalar(0x00ccaa009eUL)); - x1 = CarrylessMultiplyLower(x1 & bitmask, Vector128.CreateScalar(0x0163cd6124UL)) ^ // k5, k0 - ShiftRightBytesInVector(x1, 4); - - // Reduce to 32 bits. - kConstants = Vector128.Create(0x01db710641UL, 0x01f7011641UL); // polynomial - x2 = CarrylessMultiplyLeftLowerRightUpper(x1 & bitmask, kConstants) & bitmask; - x2 = CarrylessMultiplyLower(x2, kConstants); - x1 ^= x2; - - // Process the remaining bytes, if any - uint result = x1.AsUInt32().GetElement(1); - return length > 0 - ? UpdateScalar(result, MemoryMarshal.CreateReadOnlySpan(ref srcRef, length)) - : result; - } - } -} diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.cs index 47cd790cd451fa..f38634d4336584 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32.cs @@ -17,7 +17,7 @@ namespace System.IO.Hashing /// the Little Endian representation of 0x2144DF1C. /// /// - public sealed partial class Crc32 : NonCryptographicHashAlgorithm + public sealed class Crc32 : NonCryptographicHashAlgorithm { private const int Size = sizeof(uint); @@ -292,7 +292,8 @@ public static uint HashToUInt32(ReadOnlySpan source) // Rather than go through Crc32ParameterSet.Crc32 to end up in the optimized Update method here, // just call the Update method directly. // ITU-T V.42 / IEEE 802.3 uses a final XOR of 0xFFFFFFFF, so accelerate that as ~. - return ~Update(Crc32ParameterSet.Crc32.InitialValue, source); + Crc32ParameterSet parameterSet = Crc32ParameterSet.Crc32; + return ~parameterSet.Update(parameterSet.InitialValue, source); } /// Computes the CRC-32 hash of the provided data, using specified parameters. @@ -310,44 +311,5 @@ public static uint HashToUInt32(Crc32ParameterSet parameterSet, ReadOnlySpan source) - { -#if NET - if (CanBeVectorized(source)) - { - return UpdateVectorized(crc, source); - } -#endif - - return UpdateScalar(crc, source); - } - - private static uint UpdateScalar(uint crc, ReadOnlySpan source) - { -#if NET - // Use ARM intrinsics for CRC if available. This is used for the trailing bytes on the vectorized path - // and is the primary method if the vectorized path is unavailable. - if (System.Runtime.Intrinsics.Arm.Crc32.Arm64.IsSupported) - { - return UpdateScalarArm64(crc, source); - } - - if (System.Runtime.Intrinsics.Arm.Crc32.IsSupported) - { - return UpdateScalarArm32(crc, source); - } -#endif - - ReadOnlySpan crcLookup = CrcLookup; - for (int i = 0; i < source.Length; i++) - { - byte idx = (byte)crc; - idx ^= source[i]; - crc = crcLookup[idx] ^ (crc >> 8); - } - - return crc; - } } } diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Table.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Table.cs index ad5970a1fddf23..d85a932648ef86 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Table.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Table.cs @@ -59,17 +59,17 @@ private static uint[] GenerateLookupTable(uint polynomial, bool reflectInput) return table; } - private sealed class ReflectedTableBasedCrc32 : Crc32ParameterSet + private sealed class ReflectedTableBasedCrc32 : ReflectedCrc32 { private readonly uint[] _lookupTable; internal ReflectedTableBasedCrc32(uint polynomial, uint initialValue, uint finalXorValue) - : base(polynomial, initialValue, finalXorValue, reflectValues: true) + : base(polynomial, initialValue, finalXorValue) { _lookupTable = GenerateLookupTable(polynomial, reflectInput: true); } - internal override uint Update(uint value, ReadOnlySpan source) + protected override uint UpdateScalar(uint value, ReadOnlySpan source) { uint[] lookupTable = _lookupTable; uint crc = value; @@ -86,17 +86,17 @@ internal override uint Update(uint value, ReadOnlySpan source) } } - private sealed class ForwardTableBasedCrc32 : Crc32ParameterSet + private sealed class ForwardTableBasedCrc32 : ForwardCrc32 { private readonly uint[] _lookupTable; internal ForwardTableBasedCrc32(uint polynomial, uint initialValue, uint finalXorValue) - : base(polynomial, initialValue, finalXorValue, reflectValues: false) + : base(polynomial, initialValue, finalXorValue) { _lookupTable = GenerateLookupTable(polynomial, reflectInput: false); } - internal override uint Update(uint value, ReadOnlySpan source) + protected override uint UpdateScalar(uint value, ReadOnlySpan source) { uint[] lookupTable = _lookupTable; uint crc = value; diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Vectorized.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Vectorized.cs new file mode 100644 index 00000000000000..f32e7a07c0c2b4 --- /dev/null +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.Vectorized.cs @@ -0,0 +1,298 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if NET + +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using static System.IO.Hashing.VectorHelper; + +namespace System.IO.Hashing +{ + public partial class Crc32ParameterSet + { + private partial class ReflectedCrc32 + { + private readonly int _shouldVectorizeScale = 1; + private Vector128 _k1k2; + private Vector128 _k3k4; + private ulong _k4; + private ulong _k6; + private Vector128 _polyMu; + + protected ReflectedCrc32(int shouldVectorizeScale, uint polynomial, uint initialValue, uint finalXorValue) + : this(polynomial, initialValue, finalXorValue) + { + Debug.Assert(shouldVectorizeScale > 0); + _shouldVectorizeScale = shouldVectorizeScale; + } + + partial void InitializeVectorized(ref bool canVectorize) + { + if (!BitConverter.IsLittleEndian || !VectorHelper.IsSupported) + { + return; + } + + ulong fullPoly = (1UL << 32) | Polynomial; + + ulong k1 = ReflectConstant(CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 4 * 128 + 32), 33); + ulong k2 = ReflectConstant(CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 4 * 128 - 32), 33); + ulong k3 = ReflectConstant(CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 128 + 32), 33); + ulong k4 = ReflectConstant(CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 128 - 32), 33); + ulong k5 = ReflectConstant(CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 64), 33); + ulong mu = CrcPolynomialHelper.ComputeBarrettConstantCrc32(fullPoly); + + _k1k2 = Vector128.Create(k1, k2); + _k3k4 = Vector128.Create(k3, k4); + _k4 = k4; + _k6 = k5; + _polyMu = Vector128.Create(ReflectConstant(fullPoly, 33), ReflectConstant(mu, 33)); + + canVectorize = true; + + static ulong ReflectConstant(ulong value, int width) + { + ulong result = 0; + for (int i = 0; i < width; i++) + { + if (((value >> i) & 1) != 0) + { + result |= 1UL << (width - 1 - i); + } + } + + return result; + } + } + + partial void UpdateVectorized(ref uint crc, ReadOnlySpan source, ref int bytesConsumed) + { + if (!_canVectorize || source.Length < _shouldVectorizeScale * Vector128.Count) + { + return; + } + + crc = UpdateVectorizedCore(crc, source, out bytesConsumed); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private uint UpdateVectorizedCore(uint crc, ReadOnlySpan source, out int bytesConsumed) + { + ref byte srcRef = ref MemoryMarshal.GetReference(source); + int length = source.Length; + + Vector128 x1; + Vector128 x2; + + if (length >= Vector128.Count * 4) + { + x1 = Vector128.LoadUnsafe(ref srcRef).AsUInt64(); + x2 = Vector128.LoadUnsafe(ref srcRef, 16).AsUInt64(); + Vector128 x3 = Vector128.LoadUnsafe(ref srcRef, 32).AsUInt64(); + Vector128 x4 = Vector128.LoadUnsafe(ref srcRef, 48).AsUInt64(); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4); + length -= Vector128.Count * 4; + + x1 ^= Vector128.CreateScalar(crc).AsUInt64(); + + while (length >= Vector128.Count * 4) + { + Vector128 y5 = Vector128.LoadUnsafe(ref srcRef).AsUInt64(); + Vector128 y6 = Vector128.LoadUnsafe(ref srcRef, 16).AsUInt64(); + Vector128 y7 = Vector128.LoadUnsafe(ref srcRef, 32).AsUInt64(); + Vector128 y8 = Vector128.LoadUnsafe(ref srcRef, 48).AsUInt64(); + + x1 = FoldPolynomialPair(y5, x1, _k1k2); + x2 = FoldPolynomialPair(y6, x2, _k1k2); + x3 = FoldPolynomialPair(y7, x3, _k1k2); + x4 = FoldPolynomialPair(y8, x4, _k1k2); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4); + length -= Vector128.Count * 4; + } + + x1 = FoldPolynomialPair(x2, x1, _k3k4); + x1 = FoldPolynomialPair(x3, x1, _k3k4); + x1 = FoldPolynomialPair(x4, x1, _k3k4); + } + else + { + x1 = Vector128.LoadUnsafe(ref srcRef).AsUInt64(); + x1 ^= Vector128.CreateScalar(crc).AsUInt64(); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); + length -= Vector128.Count; + } + + while (length >= Vector128.Count) + { + x1 = FoldPolynomialPair(Vector128.LoadUnsafe(ref srcRef).AsUInt64(), x1, _k3k4); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); + length -= Vector128.Count; + } + + // Fold 128 bits to 64 bits. + Vector128 bitmask = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); + x1 = ShiftRightBytesInVector(x1, 8) ^ + CarrylessMultiplyLower(x1, Vector128.CreateScalar(_k4)); + x1 = CarrylessMultiplyLower(x1 & bitmask, Vector128.CreateScalar(_k6)) ^ + ShiftRightBytesInVector(x1, 4); + + // Barrett reduction to 32 bits. + x2 = CarrylessMultiplyLeftLowerRightUpper(x1 & bitmask, _polyMu) & bitmask; + x2 = CarrylessMultiplyLower(x2, _polyMu); + x1 ^= x2; + + bytesConsumed = source.Length - length; + return x1.AsUInt32().GetElement(1); + } + } + + private partial class ForwardCrc32 + { + private Vector128 _k1k2; + private Vector128 _k3k4; + private Vector128 _foldConstants; + private ulong _k6; + private ulong _mu; + + partial void InitializeVectorized(ref bool canVectorize) + { + if (!BitConverter.IsLittleEndian || !VectorHelper.IsSupported) + { + return; + } + + ulong fullPoly = 1UL << 32 | Polynomial; + + ulong k1 = CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 4 * 128 + 64); + ulong k2 = CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 4 * 128); + ulong k3 = CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 128 + 64); + ulong k4 = CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 128); + ulong k5 = CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 96); + ulong k6 = CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 64); + + _k1k2 = Vector128.Create(k2, k1); + _k3k4 = Vector128.Create(k4, k3); + _k6 = k6; + + _foldConstants = Vector128.Create( + CrcPolynomialHelper.ComputeFoldingConstantCrc32(fullPoly, 32), + k5); + + _mu = CrcPolynomialHelper.ComputeBarrettConstantCrc32(fullPoly); + + canVectorize = true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 LoadReversed(ref byte source, nuint elementOffset) + { + Vector128 vector = Vector128.LoadUnsafe(ref source, elementOffset); + + if (BitConverter.IsLittleEndian) + { + vector = Vector128.Shuffle( + vector, + Vector128.Create( + (byte)0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00)); + } + + return vector.AsUInt64(); + } + + partial void UpdateVectorized(ref uint crc, ReadOnlySpan source, ref int bytesConsumed) + { + if (!_canVectorize || source.Length < Vector128.Count) + { + return; + } + + crc = UpdateVectorizedCore(crc, source, out bytesConsumed); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private uint UpdateVectorizedCore(uint crc, ReadOnlySpan source, out int bytesConsumed) + { + ref byte srcRef = ref MemoryMarshal.GetReference(source); + int length = source.Length; + + Vector128 x1; + + if (length >= Vector128.Count * 4) + { + x1 = LoadReversed(ref srcRef, 0); + Vector128 x2 = LoadReversed(ref srcRef, 16); + Vector128 x3 = LoadReversed(ref srcRef, 32); + Vector128 x4 = LoadReversed(ref srcRef, 48); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4); + length -= Vector128.Count * 4; + + x1 ^= ShiftLowerToUpper(Vector128.CreateScalar((ulong)crc << 32)); + + while (length >= Vector128.Count * 4) + { + Vector128 y5 = LoadReversed(ref srcRef, 0); + Vector128 y6 = LoadReversed(ref srcRef, 16); + Vector128 y7 = LoadReversed(ref srcRef, 32); + Vector128 y8 = LoadReversed(ref srcRef, 48); + + x1 = FoldPolynomialPair(y5, x1, _k1k2); + x2 = FoldPolynomialPair(y6, x2, _k1k2); + x3 = FoldPolynomialPair(y7, x3, _k1k2); + x4 = FoldPolynomialPair(y8, x4, _k1k2); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4); + length -= Vector128.Count * 4; + } + + x1 = FoldPolynomialPair(x2, x1, _k3k4); + x1 = FoldPolynomialPair(x3, x1, _k3k4); + x1 = FoldPolynomialPair(x4, x1, _k3k4); + } + else + { + x1 = LoadReversed(ref srcRef, 0); + x1 ^= ShiftLowerToUpper(Vector128.CreateScalar((ulong)crc << 32)); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); + length -= Vector128.Count; + } + + while (length >= Vector128.Count) + { + x1 = FoldPolynomialPair(LoadReversed(ref srcRef, 0), x1, _k3k4); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); + length -= Vector128.Count; + } + + x1 = FoldPolynomialPair(Vector128.Zero, x1, _foldConstants); + + Vector128 lowerMask = Vector128.Create(~0UL, 0UL); + x1 = CarrylessMultiplyLeftUpperRightLower(x1, Vector128.CreateScalar(_k6)) ^ (x1 & lowerMask); + + Vector128 bitmask = Vector128.Create(~0, 0, ~0, 0).AsUInt64(); + Vector128 temp = x1; + x1 = ShiftRightBytesInVector(x1, 4) & bitmask; + x1 = CarrylessMultiplyLower(x1, Vector128.CreateScalar(_mu)); + x1 = ShiftRightBytesInVector(x1, 4) & bitmask; + + x1 = CarrylessMultiplyLower(x1, Vector128.CreateScalar(Polynomial)); + x1 ^= temp; + + bytesConsumed = source.Length - length; + return x1.AsUInt32().GetElement(0); + } + } + } +} + +#endif diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.WellKnown.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.WellKnown.cs index fe4d7a24cffe88..d51799ab491b20 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.WellKnown.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.WellKnown.cs @@ -45,25 +45,165 @@ private static Crc32ParameterSet MakeCrc32CParameterSet() reflectValues: true); } - private sealed class Ieee8023ParameterSet : Crc32ParameterSet + private sealed class Ieee8023ParameterSet : ReflectedCrc32 { + // Pre-computed reflection table for the standard CRC-32 polynomial, 0x04C11DB7. + // See the GenerateTable method in Crc32ParameterSet.Table.cs + private static ReadOnlySpan CrcLookup => + [ + 0x0, 0x77073096, 0xEE0E612C, 0x990951BA, 0x76DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, + 0xEDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x9B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, + 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, + 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, + 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, + 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, + 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, + 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, + 0x76DC4190, 0x1DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x6B6B51F, 0x9FBFE4A5, 0xE8B8D433, + 0x7807C9A2, 0xF00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x86D3D2D, 0x91646C97, 0xE6635C01, + 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, + 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, + 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, + 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, + 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, + 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, + 0xEDB88320, 0x9ABFB3B6, 0x3B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x4DB2615, 0x73DC1683, + 0xE3630B12, 0x94643B84, 0xD6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0xA00AE27, 0x7D079EB1, + 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, + 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, + 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, + 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, + 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, + 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, + 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x26D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x5005713, + 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0xCB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0xBDBDF21, + 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, + 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, + 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, + 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, + 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, + 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, + ]; + public Ieee8023ParameterSet() - : base(0x04c11db7, 0xffffffff, 0xffffffff, reflectValues: true) + : base( +#if NET + System.Runtime.Intrinsics.Arm.Crc32.IsSupported ? 8 : 1, +#endif + 0x04c11db7, + 0xffffffff, + 0xffffffff) + { + } + + protected override uint UpdateScalar(uint value, ReadOnlySpan source) { +#if NET + if (System.Runtime.Intrinsics.Arm.Crc32.Arm64.IsSupported) + { + return UpdateScalarArm64(value, source); + } + + if (System.Runtime.Intrinsics.Arm.Crc32.IsSupported) + { + return UpdateScalarArm(value, source); + } +#endif + + return UpdateScalarTable(value, source); + } + + private static uint UpdateScalarTable(uint crc, ReadOnlySpan source) + { + ReadOnlySpan crcLookup = CrcLookup; + + foreach (byte b in source) + { + byte idx = (byte)crc; + idx ^= b; + crc = crcLookup[idx] ^ (crc >> 8); + } + + return crc; + } + +#if NET + private static uint UpdateScalarArm64(uint crc, ReadOnlySpan source) + { + Debug.Assert(System.Runtime.Intrinsics.Arm.Crc32.Arm64.IsSupported, "ARM CRC support is required."); + + // Compute in 8 byte chunks + if (source.Length >= sizeof(ulong)) + { + ref byte ptr = ref MemoryMarshal.GetReference(source); + + // Exclude trailing bytes not a multiple of 8 + int longLength = source.Length & ~0x7; + + for (int i = 0; i < longLength; i += sizeof(ulong)) + { + crc = System.Runtime.Intrinsics.Arm.Crc32.Arm64.ComputeCrc32( + crc, + Unsafe.ReadUnaligned(ref Unsafe.Add(ref ptr, i))); + } + source = source.Slice(longLength); + } + + // Compute remaining bytes + for (int i = 0; i < source.Length; i++) + { + crc = System.Runtime.Intrinsics.Arm.Crc32.ComputeCrc32(crc, source[i]); + } + + return crc; } - internal override uint Update(uint value, ReadOnlySpan source) => Hashing.Crc32.Update(value, source); + private static uint UpdateScalarArm(uint crc, ReadOnlySpan source) + { + Debug.Assert(System.Runtime.Intrinsics.Arm.Crc32.IsSupported, "ARM CRC support is required."); + + // Compute in 4 byte chunks + if (source.Length >= sizeof(uint)) + { + ref byte ptr = ref MemoryMarshal.GetReference(source); + + // Exclude trailing bytes not a multiple of 4 + int intLength = source.Length & ~0x3; + + for (int i = 0; i < intLength; i += sizeof(uint)) + { + crc = System.Runtime.Intrinsics.Arm.Crc32.ComputeCrc32( + crc, + Unsafe.ReadUnaligned(ref Unsafe.Add(ref ptr, i))); + } + + source = source.Slice(intLength); + } + + // Compute remaining bytes + for (int i = 0; i < source.Length; i++) + { + crc = System.Runtime.Intrinsics.Arm.Crc32.ComputeCrc32(crc, source[i]); + } + + return crc; + } +#endif } #if NET - private sealed class Crc32CParameterSet : Crc32ParameterSet + private sealed class Crc32CParameterSet : ReflectedCrc32 { public Crc32CParameterSet() - : base(0x1edc6f41, 0xffffffff, 0xffffffff, reflectValues: true) + : base( + System.Runtime.Intrinsics.X86.Sse42.IsSupported || System.Runtime.Intrinsics.Arm.Crc32.IsSupported ? 8 : 1, + 0x1edc6f41, + 0xffffffff, + 0xffffffff) { } - internal override uint Update(uint value, ReadOnlySpan source) => UpdateIntrinsic(value, source); + protected override uint UpdateScalar(uint value, ReadOnlySpan source) => UpdateIntrinsic(value, source); private static uint UpdateIntrinsic(uint crc, ReadOnlySpan source) { diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.cs index ade85cb127ec36..ad493fbed121c2 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc32ParameterSet.cs @@ -111,5 +111,73 @@ private static uint ReverseBits(uint value) return BinaryPrimitives.ReverseEndianness(value); } + + private abstract partial class ReflectedCrc32 : Crc32ParameterSet + { +#if NET + // Declare the capability field here so it can be declared readonly. + private readonly bool _canVectorize; +#endif + + partial void InitializeVectorized(ref bool canVectorize); + partial void UpdateVectorized(ref uint crc, ReadOnlySpan source, ref int bytesConsumed); + + protected ReflectedCrc32(uint polynomial, uint initialValue, uint finalXorValue) + : base(polynomial, initialValue, finalXorValue, reflectValues: true) + { +#if NET + InitializeVectorized(ref _canVectorize); +#endif + } + + protected abstract uint UpdateScalar(uint value, ReadOnlySpan source); + + internal sealed override uint Update(uint value, ReadOnlySpan source) + { + int consumed = 0; + UpdateVectorized(ref value, source, ref consumed); + + if (consumed < source.Length) + { + value = UpdateScalar(value, source.Slice(consumed)); + } + + return value; + } + } + + private abstract partial class ForwardCrc32 : Crc32ParameterSet + { +#if NET + // Declare the capability field here so it can be declared readonly. + private readonly bool _canVectorize; +#endif + + partial void InitializeVectorized(ref bool canVectorize); + partial void UpdateVectorized(ref uint crc, ReadOnlySpan source, ref int bytesConsumed); + + protected ForwardCrc32(uint polynomial, uint initialValue, uint finalXorValue) + : base(polynomial, initialValue, finalXorValue, reflectValues: false) + { +#if NET + InitializeVectorized(ref _canVectorize); +#endif + } + + protected abstract uint UpdateScalar(uint value, ReadOnlySpan source); + + internal sealed override uint Update(uint value, ReadOnlySpan source) + { + int consumed = 0; + UpdateVectorized(ref value, source, ref consumed); + + if (consumed < source.Length) + { + value = UpdateScalar(value, source.Slice(consumed)); + } + + return value; + } + } } } diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.Table.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.Table.cs deleted file mode 100644 index 686c66079b9ccc..00000000000000 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.Table.cs +++ /dev/null @@ -1,69 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -namespace System.IO.Hashing -{ - public sealed partial class Crc64 : NonCryptographicHashAlgorithm - { - /// CRC-64 transition table. - private static ReadOnlySpan CrcLookup => // 256 - [ - // Generated by GenerateTable(0x42F0E1EBA9EA3693): - // - // static ulong[] GenerateTable(ulong polynomial) - // { - // var table = new ulong[256]; - // for (int i = 0; i < table.Length; i++) - // { - // ulong val = (ulong)i << 56; - // for (int j = 0; j < 8; j++) - // { - // if ((val & 0x8000_0000_0000_0000) == 0) - // { - // val <<= 1; - // } - // else - // { - // val = (val << 1) ^ polynomial; - // } - // } - // table[i] = val; - // } - // return table; - // } - - 0x0, 0x42F0E1EBA9EA3693, 0x85E1C3D753D46D26, 0xC711223CFA3E5BB5, 0x493366450E42ECDF, 0xBC387AEA7A8DA4C, 0xCCD2A5925D9681F9, 0x8E224479F47CB76A, - 0x9266CC8A1C85D9BE, 0xD0962D61B56FEF2D, 0x17870F5D4F51B498, 0x5577EEB6E6BB820B, 0xDB55AACF12C73561, 0x99A54B24BB2D03F2, 0x5EB4691841135847, 0x1C4488F3E8F96ED4, - 0x663D78FF90E185EF, 0x24CD9914390BB37C, 0xE3DCBB28C335E8C9, 0xA12C5AC36ADFDE5A, 0x2F0E1EBA9EA36930, 0x6DFEFF5137495FA3, 0xAAEFDD6DCD770416, 0xE81F3C86649D3285, - 0xF45BB4758C645C51, 0xB6AB559E258E6AC2, 0x71BA77A2DFB03177, 0x334A9649765A07E4, 0xBD68D2308226B08E, 0xFF9833DB2BCC861D, 0x388911E7D1F2DDA8, 0x7A79F00C7818EB3B, - 0xCC7AF1FF21C30BDE, 0x8E8A101488293D4D, 0x499B3228721766F8, 0xB6BD3C3DBFD506B, 0x854997BA2F81E701, 0xC7B97651866BD192, 0xA8546D7C558A27, 0x4258B586D5BFBCB4, - 0x5E1C3D753D46D260, 0x1CECDC9E94ACE4F3, 0xDBFDFEA26E92BF46, 0x990D1F49C77889D5, 0x172F5B3033043EBF, 0x55DFBADB9AEE082C, 0x92CE98E760D05399, 0xD03E790CC93A650A, - 0xAA478900B1228E31, 0xE8B768EB18C8B8A2, 0x2FA64AD7E2F6E317, 0x6D56AB3C4B1CD584, 0xE374EF45BF6062EE, 0xA1840EAE168A547D, 0x66952C92ECB40FC8, 0x2465CD79455E395B, - 0x3821458AADA7578F, 0x7AD1A461044D611C, 0xBDC0865DFE733AA9, 0xFF3067B657990C3A, 0x711223CFA3E5BB50, 0x33E2C2240A0F8DC3, 0xF4F3E018F031D676, 0xB60301F359DBE0E5, - 0xDA050215EA6C212F, 0x98F5E3FE438617BC, 0x5FE4C1C2B9B84C09, 0x1D14202910527A9A, 0x93366450E42ECDF0, 0xD1C685BB4DC4FB63, 0x16D7A787B7FAA0D6, 0x5427466C1E109645, - 0x4863CE9FF6E9F891, 0xA932F745F03CE02, 0xCD820D48A53D95B7, 0x8F72ECA30CD7A324, 0x150A8DAF8AB144E, 0x43A04931514122DD, 0x84B16B0DAB7F7968, 0xC6418AE602954FFB, - 0xBC387AEA7A8DA4C0, 0xFEC89B01D3679253, 0x39D9B93D2959C9E6, 0x7B2958D680B3FF75, 0xF50B1CAF74CF481F, 0xB7FBFD44DD257E8C, 0x70EADF78271B2539, 0x321A3E938EF113AA, - 0x2E5EB66066087D7E, 0x6CAE578BCFE24BED, 0xABBF75B735DC1058, 0xE94F945C9C3626CB, 0x676DD025684A91A1, 0x259D31CEC1A0A732, 0xE28C13F23B9EFC87, 0xA07CF2199274CA14, - 0x167FF3EACBAF2AF1, 0x548F120162451C62, 0x939E303D987B47D7, 0xD16ED1D631917144, 0x5F4C95AFC5EDC62E, 0x1DBC74446C07F0BD, 0xDAAD56789639AB08, 0x985DB7933FD39D9B, - 0x84193F60D72AF34F, 0xC6E9DE8B7EC0C5DC, 0x1F8FCB784FE9E69, 0x43081D5C2D14A8FA, 0xCD2A5925D9681F90, 0x8FDAB8CE70822903, 0x48CB9AF28ABC72B6, 0xA3B7B1923564425, - 0x70428B155B4EAF1E, 0x32B26AFEF2A4998D, 0xF5A348C2089AC238, 0xB753A929A170F4AB, 0x3971ED50550C43C1, 0x7B810CBBFCE67552, 0xBC902E8706D82EE7, 0xFE60CF6CAF321874, - 0xE224479F47CB76A0, 0xA0D4A674EE214033, 0x67C58448141F1B86, 0x253565A3BDF52D15, 0xAB1721DA49899A7F, 0xE9E7C031E063ACEC, 0x2EF6E20D1A5DF759, 0x6C0603E6B3B7C1CA, - 0xF6FAE5C07D3274CD, 0xB40A042BD4D8425E, 0x731B26172EE619EB, 0x31EBC7FC870C2F78, 0xBFC9838573709812, 0xFD39626EDA9AAE81, 0x3A28405220A4F534, 0x78D8A1B9894EC3A7, - 0x649C294A61B7AD73, 0x266CC8A1C85D9BE0, 0xE17DEA9D3263C055, 0xA38D0B769B89F6C6, 0x2DAF4F0F6FF541AC, 0x6F5FAEE4C61F773F, 0xA84E8CD83C212C8A, 0xEABE6D3395CB1A19, - 0x90C79D3FEDD3F122, 0xD2377CD44439C7B1, 0x15265EE8BE079C04, 0x57D6BF0317EDAA97, 0xD9F4FB7AE3911DFD, 0x9B041A914A7B2B6E, 0x5C1538ADB04570DB, 0x1EE5D94619AF4648, - 0x2A151B5F156289C, 0x4051B05E58BC1E0F, 0x87409262A28245BA, 0xC5B073890B687329, 0x4B9237F0FF14C443, 0x962D61B56FEF2D0, 0xCE73F427ACC0A965, 0x8C8315CC052A9FF6, - 0x3A80143F5CF17F13, 0x7870F5D4F51B4980, 0xBF61D7E80F251235, 0xFD913603A6CF24A6, 0x73B3727A52B393CC, 0x31439391FB59A55F, 0xF652B1AD0167FEEA, 0xB4A25046A88DC879, - 0xA8E6D8B54074A6AD, 0xEA16395EE99E903E, 0x2D071B6213A0CB8B, 0x6FF7FA89BA4AFD18, 0xE1D5BEF04E364A72, 0xA3255F1BE7DC7CE1, 0x64347D271DE22754, 0x26C49CCCB40811C7, - 0x5CBD6CC0CC10FAFC, 0x1E4D8D2B65FACC6F, 0xD95CAF179FC497DA, 0x9BAC4EFC362EA149, 0x158E0A85C2521623, 0x577EEB6E6BB820B0, 0x906FC95291867B05, 0xD29F28B9386C4D96, - 0xCEDBA04AD0952342, 0x8C2B41A1797F15D1, 0x4B3A639D83414E64, 0x9CA82762AAB78F7, 0x87E8C60FDED7CF9D, 0xC51827E4773DF90E, 0x20905D88D03A2BB, 0x40F9E43324E99428, - 0x2CFFE7D5975E55E2, 0x6E0F063E3EB46371, 0xA91E2402C48A38C4, 0xEBEEC5E96D600E57, 0x65CC8190991CB93D, 0x273C607B30F68FAE, 0xE02D4247CAC8D41B, 0xA2DDA3AC6322E288, - 0xBE992B5F8BDB8C5C, 0xFC69CAB42231BACF, 0x3B78E888D80FE17A, 0x7988096371E5D7E9, 0xF7AA4D1A85996083, 0xB55AACF12C735610, 0x724B8ECDD64D0DA5, 0x30BB6F267FA73B36, - 0x4AC29F2A07BFD00D, 0x8327EC1AE55E69E, 0xCF235CFD546BBD2B, 0x8DD3BD16FD818BB8, 0x3F1F96F09FD3CD2, 0x41011884A0170A41, 0x86103AB85A2951F4, 0xC4E0DB53F3C36767, - 0xD8A453A01B3A09B3, 0x9A54B24BB2D03F20, 0x5D45907748EE6495, 0x1FB5719CE1045206, 0x919735E51578E56C, 0xD367D40EBC92D3FF, 0x1476F63246AC884A, 0x568617D9EF46BED9, - 0xE085162AB69D5E3C, 0xA275F7C11F7768AF, 0x6564D5FDE549331A, 0x279434164CA30589, 0xA9B6706FB8DFB2E3, 0xEB46918411358470, 0x2C57B3B8EB0BDFC5, 0x6EA7525342E1E956, - 0x72E3DAA0AA188782, 0x30133B4B03F2B111, 0xF7021977F9CCEAA4, 0xB5F2F89C5026DC37, 0x3BD0BCE5A45A6B5D, 0x79205D0E0DB05DCE, 0xBE317F32F78E067B, 0xFCC19ED95E6430E8, - 0x86B86ED5267CDBD3, 0xC4488F3E8F96ED40, 0x359AD0275A8B6F5, 0x41A94CE9DC428066, 0xCF8B0890283E370C, 0x8D7BE97B81D4019F, 0x4A6ACB477BEA5A2A, 0x89A2AACD2006CB9, - 0x14DEA25F3AF9026D, 0x562E43B4931334FE, 0x913F6188692D6F4B, 0xD3CF8063C0C759D8, 0x5DEDC41A34BBEEB2, 0x1F1D25F19D51D821, 0xD80C07CD676F8394, 0x9AFCE626CE85B507, - ]; - } -} diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.Vectorized.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.Vectorized.cs deleted file mode 100644 index 98e5a6741bae59..00000000000000 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.Vectorized.cs +++ /dev/null @@ -1,164 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using static System.IO.Hashing.VectorHelper; - -namespace System.IO.Hashing -{ - public partial class Crc64 - { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 LoadFromSource(ref byte source, nuint elementOffset) - { - Vector128 vector = Vector128.LoadUnsafe(ref source, elementOffset); - - if (BitConverter.IsLittleEndian) - { - // Reverse the byte order. - - // SSSE3 is required to get PSHUFB acceleration for Vector128.Shuffle on x86/x64. - // However, the gains from vectorizing the rest of the operations seem to to be - // greater than the added cost of emulating the shuffle, so we don't require SSSE3 support. - vector = Vector128.Shuffle(vector, - Vector128.Create((byte)0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, 0x07, 0x06, 0x05, 0x04, 0x03, - 0x02, 0x01, 0x00)); - } - - return vector.AsUInt64(); - } - - // All of these checks except the length check are elided by JIT, so the JITted implementation - // will be either a return false or a length check against a constant. This means this method - // should be inlined into the caller. - private static bool CanBeVectorized(ReadOnlySpan source) => VectorHelper.IsSupported && source.Length >= Vector128.Count; - - // Processes the bytes in source in 128 byte chunks using intrinsics, followed by processing 16 - // byte chunks, and then processing remaining bytes individually. Requires at least 16 bytes of data. - // Requires little endian byte order and support for PCLMULQDQ intrinsics on Intel architecture - // or AES and AdvSimd intrinsics on ARM architecture. Based on the algorithm put forth in the Intel paper - // "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" in December, 2009 and the - // Intel reference implementation. - // https://github.com/intel/isa-l/blob/33a2d9484595c2d6516c920ce39a694c144ddf69/crc/crc64_ecma_norm_by8.asm - // - // Marking this as noinline so the JIT doesn't try and inline this and end up not inlining some of the calls it makes. - // - [MethodImpl(MethodImplOptions.NoInlining)] - private static ulong UpdateVectorized(ulong crc, ReadOnlySpan source) - { - Debug.Assert(CanBeVectorized(source), "source cannot be vectorized."); - - // Work with a reference to where we're at in the ReadOnlySpan and a local length - // to avoid extraneous range checks. - ref byte srcRef = ref MemoryMarshal.GetReference(source); - int length = source.Length; - - Vector128 x7; // Accumulator for the new CRC - Vector128 kConstants; // Used to store reused constants - - if (length >= Vector128.Count * 16) // At least 256 bytes - { - // Load the first 128 bytes - Vector128 x0 = LoadFromSource(ref srcRef, 0); - Vector128 x1 = LoadFromSource(ref srcRef, 16); - Vector128 x2 = LoadFromSource(ref srcRef, 32); - Vector128 x3 = LoadFromSource(ref srcRef, 48); - Vector128 x4 = LoadFromSource(ref srcRef, 64); - Vector128 x5 = LoadFromSource(ref srcRef, 80); - Vector128 x6 = LoadFromSource(ref srcRef, 96); - x7 = LoadFromSource(ref srcRef, 112); - - srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 8); - length -= Vector128.Count * 8; - - // Load and XOR the initial CRC value - // CRC value does not need to be byte-reflected, but it needs to be moved to the high part of the register. - // because data will be byte-reflected and will align with initial crc at correct place. - x0 ^= ShiftLowerToUpper(Vector128.CreateScalar(crc)); - - kConstants = Vector128.Create(0x5cf79dea9ac37d6UL, 0x001067e571d7d5c2UL); // k3, k4 - - // Parallel fold blocks of 128 - do - { - Vector128 y1 = LoadFromSource(ref srcRef, 0); - Vector128 y2 = LoadFromSource(ref srcRef, 16); - x0 = FoldPolynomialPair(y1, x0, kConstants); - x1 = FoldPolynomialPair(y2, x1, kConstants); - - y1 = LoadFromSource(ref srcRef, 32); - y2 = LoadFromSource(ref srcRef, 48); - x2 = FoldPolynomialPair(y1, x2, kConstants); - x3 = FoldPolynomialPair(y2, x3, kConstants); - - y1 = LoadFromSource(ref srcRef, 64); - y2 = LoadFromSource(ref srcRef, 80); - x4 = FoldPolynomialPair(y1, x4, kConstants); - x5 = FoldPolynomialPair(y2, x5, kConstants); - - y1 = LoadFromSource(ref srcRef, 96); - y2 = LoadFromSource(ref srcRef, 112); - x6 = FoldPolynomialPair(y1, x6, kConstants); - x7 = FoldPolynomialPair(y2, x7, kConstants); - - srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 8); - length -= Vector128.Count * 8; - } while (length >= Vector128.Count * 8); - - // Fold into 128-bits in x7 - x7 = FoldPolynomialPair(x7, x0, Vector128.Create(0xe464f4df5fb60ac1UL, 0xb649c5b35a759cf2UL)); // k9, k10 - x7 = FoldPolynomialPair(x7, x1, Vector128.Create(0x9af04e1eff82d0ddUL, 0x6e82e609297f8fe8UL)); // k11, k12 - x7 = FoldPolynomialPair(x7, x2, Vector128.Create(0x97c516e98bd2e73UL, 0xb76477b31e22e7bUL)); // k13, k14 - x7 = FoldPolynomialPair(x7, x3, Vector128.Create(0x5f6843ca540df020UL, 0xddf4b6981205b83fUL)); // k15, k16 - x7 = FoldPolynomialPair(x7, x4, Vector128.Create(0x54819d8713758b2cUL, 0x4a6b90073eb0af5aUL)); // k17, k18 - x7 = FoldPolynomialPair(x7, x5, Vector128.Create(0x571bee0a227ef92bUL, 0x44bef2a201b5200cUL)); // k19, k20 - x7 = FoldPolynomialPair(x7, x6, Vector128.Create(0x5f5c3c7eb52fab6UL, 0x4eb938a7d257740eUL)); // k1, k2 - } - else - { - // For shorter sources just load the first vector and XOR with the CRC - Debug.Assert(length >= 16); - - x7 = LoadFromSource(ref srcRef, 0); - - // Load and XOR the initial CRC value - // CRC value does not need to be byte-reflected, but it needs to be moved to the high part of the register. - // because the data will be byte-reflected and will align with initial crc at correct place. - x7 ^= ShiftLowerToUpper(Vector128.CreateScalar(crc)); - - srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); - length -= Vector128.Count; - } - - // Single fold blocks of 16, if any, into x7 - while (length >= Vector128.Count) - { - x7 = FoldPolynomialPair(LoadFromSource(ref srcRef, 0), x7, - Vector128.Create(0x5f5c3c7eb52fab6UL, 0x4eb938a7d257740eUL)); // k1, k2 - - srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); - length -= Vector128.Count; - } - - // Compute CRC of a 128-bit value and fold to the upper 64-bits - x7 = CarrylessMultiplyLeftUpperRightLower(x7, Vector128.CreateScalar(0x5f5c3c7eb52fab6UL)) ^ // k5 - ShiftLowerToUpper(x7); - - // Barrett reduction - kConstants = Vector128.Create(0x578d29d06cc4f872UL, 0x42f0e1eba9ea3693UL); // k7, k8 - Vector128 temp = x7; - x7 = CarrylessMultiplyLeftUpperRightLower(x7, kConstants) ^ (x7 & Vector128.Create(0UL, ~0UL)); - x7 = CarrylessMultiplyUpper(x7, kConstants); - x7 ^= temp; - - // Process the remaining bytes, if any - ulong result = x7.GetElement(0); - return length > 0 - ? UpdateScalar(result, MemoryMarshal.CreateReadOnlySpan(ref srcRef, length)) - : result; - } - } -} diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.cs index df403e157b9a05..88281e1011b9bc 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64.cs @@ -26,7 +26,7 @@ namespace System.IO.Hashing /// compatible with the cyclic redundancy check described in ISO 3309. /// /// - public sealed partial class Crc64 : NonCryptographicHashAlgorithm + public sealed class Crc64 : NonCryptographicHashAlgorithm { private const int Size = sizeof(ulong); @@ -301,7 +301,8 @@ public static ulong HashToUInt64(ReadOnlySpan source) // Rather than go through Crc64ParameterSet.Crc64 to end up in the optimized Update method here, // just call the Update method directly. // ECMA-182 uses a final XOR of zero, so directly return the result. - return Update(Crc64ParameterSet.Crc64.InitialValue, source); + Crc64ParameterSet parameterSet = Crc64ParameterSet.Crc64; + return parameterSet.Update(parameterSet.InitialValue, source); } /// Computes the CRC-64 hash of the provided data, using the specified parameters. @@ -319,30 +320,5 @@ public static ulong HashToUInt64(Crc64ParameterSet parameterSet, ReadOnlySpan source) - { -#if NET - if (CanBeVectorized(source)) - { - return UpdateVectorized(crc, source); - } -#endif - - return UpdateScalar(crc, source); - } - - private static ulong UpdateScalar(ulong crc, ReadOnlySpan source) - { - ReadOnlySpan crcLookup = CrcLookup; - for (int i = 0; i < source.Length; i++) - { - ulong idx = (crc >> 56); - idx ^= source[i]; - crc = crcLookup[(int)idx] ^ (crc << 8); - } - - return crc; - } } } diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Table.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Table.cs index 820fc8093fbdc9..522cf0e70fb5b3 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Table.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Table.cs @@ -86,17 +86,17 @@ internal override ulong Update(ulong value, ReadOnlySpan data) } } - private sealed class ForwardTableBasedCrc64 : Crc64ParameterSet + private sealed class ForwardTableBasedCrc64 : ForwardCrc64 { private readonly ulong[] _lookupTable; internal ForwardTableBasedCrc64(ulong polynomial, ulong initialValue, ulong finalXorValue) - : base(polynomial, initialValue, finalXorValue, reflectValues: false) + : base(polynomial, initialValue, finalXorValue) { _lookupTable = GenerateLookupTable(polynomial, reflectInput: false); } - internal override ulong Update(ulong value, ReadOnlySpan data) + protected override ulong UpdateScalar(ulong value, ReadOnlySpan data) { ulong[] lookupTable = _lookupTable; ulong crc = value; diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Vectorized.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Vectorized.cs new file mode 100644 index 00000000000000..56779ef7c25685 --- /dev/null +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.Vectorized.cs @@ -0,0 +1,148 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if NET + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using static System.IO.Hashing.VectorHelper; + +namespace System.IO.Hashing +{ + public partial class Crc64ParameterSet + { + private partial class ForwardCrc64 + { + private Vector128 _k1k2; + private Vector128 _k3k4; + private ulong _k4; + private Vector128 _muPoly; + + partial void InitializeVectorized(ref bool canVectorize) + { + if (!BitConverter.IsLittleEndian || !VectorHelper.IsSupported) + { + return; + } + + ulong reducedPolynomial = Polynomial; + + ulong k1 = CrcPolynomialHelper.ComputeFoldingConstantCrc64(reducedPolynomial, 4 * 128 + 64); + ulong k2 = CrcPolynomialHelper.ComputeFoldingConstantCrc64(reducedPolynomial, 4 * 128); + ulong k3 = CrcPolynomialHelper.ComputeFoldingConstantCrc64(reducedPolynomial, 128 + 64); + ulong k4 = CrcPolynomialHelper.ComputeFoldingConstantCrc64(reducedPolynomial, 128); + ulong mu = CrcPolynomialHelper.ComputeBarrettConstantCrc64(reducedPolynomial); + + _k1k2 = Vector128.Create(k2, k1); + _k3k4 = Vector128.Create(k4, k3); + _k4 = k4; + _muPoly = Vector128.Create(mu, reducedPolynomial); + + canVectorize = true; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128 LoadReversed(ref byte source, nuint elementOffset) + { + Vector128 vector = Vector128.LoadUnsafe(ref source, elementOffset); + + if (BitConverter.IsLittleEndian) + { + vector = Vector128.Shuffle( + vector, + Vector128.Create( + (byte)0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00)); + } + + return vector.AsUInt64(); + } + + partial void UpdateVectorized(ref ulong crc, ReadOnlySpan source, ref int bytesConsumed) + { + if (!_canVectorize || source.Length < Vector128.Count) + { + return; + } + + crc = UpdateVectorizedCore(crc, source, out bytesConsumed); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private ulong UpdateVectorizedCore(ulong crc, ReadOnlySpan source, out int bytesConsumed) + { + ref byte srcRef = ref MemoryMarshal.GetReference(source); + int length = source.Length; + + Vector128 x1; + + if (length >= Vector128.Count * 4) + { + x1 = LoadReversed(ref srcRef, 0); + Vector128 x2 = LoadReversed(ref srcRef, 16); + Vector128 x3 = LoadReversed(ref srcRef, 32); + Vector128 x4 = LoadReversed(ref srcRef, 48); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4); + length -= Vector128.Count * 4; + + x1 ^= ShiftLowerToUpper(Vector128.CreateScalar(crc)); + + while (length >= Vector128.Count * 4) + { + Vector128 y5 = LoadReversed(ref srcRef, 0); + Vector128 y6 = LoadReversed(ref srcRef, 16); + Vector128 y7 = LoadReversed(ref srcRef, 32); + Vector128 y8 = LoadReversed(ref srcRef, 48); + + x1 = FoldPolynomialPair(y5, x1, _k1k2); + x2 = FoldPolynomialPair(y6, x2, _k1k2); + x3 = FoldPolynomialPair(y7, x3, _k1k2); + x4 = FoldPolynomialPair(y8, x4, _k1k2); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count * 4); + length -= Vector128.Count * 4; + } + + x1 = FoldPolynomialPair(x2, x1, _k3k4); + x1 = FoldPolynomialPair(x3, x1, _k3k4); + x1 = FoldPolynomialPair(x4, x1, _k3k4); + } + else + { + x1 = LoadReversed(ref srcRef, 0); + x1 ^= ShiftLowerToUpper(Vector128.CreateScalar(crc)); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); + length -= Vector128.Count; + } + + while (length >= Vector128.Count) + { + x1 = FoldPolynomialPair(LoadReversed(ref srcRef, 0), x1, _k3k4); + + srcRef = ref Unsafe.Add(ref srcRef, Vector128.Count); + length -= Vector128.Count; + } + + // Fold 128→64 bits (forward: fold upper into lower) + x1 = CarrylessMultiplyLeftUpperRightLower( + x1, + Vector128.CreateScalar(_k4)) ^ ShiftLowerToUpper(x1); + + // Barrett reduction + Vector128 temp = x1; + x1 = CarrylessMultiplyLeftUpperRightLower(x1, _muPoly) ^ + (x1 & Vector128.Create(0UL, ~0UL)); + x1 = CarrylessMultiplyUpper(x1, _muPoly); + x1 ^= temp; + + bytesConsumed = source.Length - length; + return x1.GetElement(0); + } + } + } +} + +#endif diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.WellKnown.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.WellKnown.cs index 241e3c60a9811d..227e4ea3924580 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.WellKnown.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.WellKnown.cs @@ -23,14 +23,64 @@ public partial class Crc64ParameterSet public static Crc64ParameterSet Nvme => field ??= Create(0xAD93D23594C93659, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, true); - private sealed class Ecma182ParameterSet : Crc64ParameterSet + private sealed class Ecma182ParameterSet : ForwardCrc64 { + // Pre-computed lookup table for the ECMA-182 polynomial, which is 0x42F0E1EBA9EA3693. + // See GenerateLookupTable in Crc64ParameterSet.Table.cs for how this was generated. + private static ReadOnlySpan CrcLookup => // 256 + [ + 0x0, 0x42F0E1EBA9EA3693, 0x85E1C3D753D46D26, 0xC711223CFA3E5BB5, 0x493366450E42ECDF, 0xBC387AEA7A8DA4C, 0xCCD2A5925D9681F9, 0x8E224479F47CB76A, + 0x9266CC8A1C85D9BE, 0xD0962D61B56FEF2D, 0x17870F5D4F51B498, 0x5577EEB6E6BB820B, 0xDB55AACF12C73561, 0x99A54B24BB2D03F2, 0x5EB4691841135847, 0x1C4488F3E8F96ED4, + 0x663D78FF90E185EF, 0x24CD9914390BB37C, 0xE3DCBB28C335E8C9, 0xA12C5AC36ADFDE5A, 0x2F0E1EBA9EA36930, 0x6DFEFF5137495FA3, 0xAAEFDD6DCD770416, 0xE81F3C86649D3285, + 0xF45BB4758C645C51, 0xB6AB559E258E6AC2, 0x71BA77A2DFB03177, 0x334A9649765A07E4, 0xBD68D2308226B08E, 0xFF9833DB2BCC861D, 0x388911E7D1F2DDA8, 0x7A79F00C7818EB3B, + 0xCC7AF1FF21C30BDE, 0x8E8A101488293D4D, 0x499B3228721766F8, 0xB6BD3C3DBFD506B, 0x854997BA2F81E701, 0xC7B97651866BD192, 0xA8546D7C558A27, 0x4258B586D5BFBCB4, + 0x5E1C3D753D46D260, 0x1CECDC9E94ACE4F3, 0xDBFDFEA26E92BF46, 0x990D1F49C77889D5, 0x172F5B3033043EBF, 0x55DFBADB9AEE082C, 0x92CE98E760D05399, 0xD03E790CC93A650A, + 0xAA478900B1228E31, 0xE8B768EB18C8B8A2, 0x2FA64AD7E2F6E317, 0x6D56AB3C4B1CD584, 0xE374EF45BF6062EE, 0xA1840EAE168A547D, 0x66952C92ECB40FC8, 0x2465CD79455E395B, + 0x3821458AADA7578F, 0x7AD1A461044D611C, 0xBDC0865DFE733AA9, 0xFF3067B657990C3A, 0x711223CFA3E5BB50, 0x33E2C2240A0F8DC3, 0xF4F3E018F031D676, 0xB60301F359DBE0E5, + 0xDA050215EA6C212F, 0x98F5E3FE438617BC, 0x5FE4C1C2B9B84C09, 0x1D14202910527A9A, 0x93366450E42ECDF0, 0xD1C685BB4DC4FB63, 0x16D7A787B7FAA0D6, 0x5427466C1E109645, + 0x4863CE9FF6E9F891, 0xA932F745F03CE02, 0xCD820D48A53D95B7, 0x8F72ECA30CD7A324, 0x150A8DAF8AB144E, 0x43A04931514122DD, 0x84B16B0DAB7F7968, 0xC6418AE602954FFB, + 0xBC387AEA7A8DA4C0, 0xFEC89B01D3679253, 0x39D9B93D2959C9E6, 0x7B2958D680B3FF75, 0xF50B1CAF74CF481F, 0xB7FBFD44DD257E8C, 0x70EADF78271B2539, 0x321A3E938EF113AA, + 0x2E5EB66066087D7E, 0x6CAE578BCFE24BED, 0xABBF75B735DC1058, 0xE94F945C9C3626CB, 0x676DD025684A91A1, 0x259D31CEC1A0A732, 0xE28C13F23B9EFC87, 0xA07CF2199274CA14, + 0x167FF3EACBAF2AF1, 0x548F120162451C62, 0x939E303D987B47D7, 0xD16ED1D631917144, 0x5F4C95AFC5EDC62E, 0x1DBC74446C07F0BD, 0xDAAD56789639AB08, 0x985DB7933FD39D9B, + 0x84193F60D72AF34F, 0xC6E9DE8B7EC0C5DC, 0x1F8FCB784FE9E69, 0x43081D5C2D14A8FA, 0xCD2A5925D9681F90, 0x8FDAB8CE70822903, 0x48CB9AF28ABC72B6, 0xA3B7B1923564425, + 0x70428B155B4EAF1E, 0x32B26AFEF2A4998D, 0xF5A348C2089AC238, 0xB753A929A170F4AB, 0x3971ED50550C43C1, 0x7B810CBBFCE67552, 0xBC902E8706D82EE7, 0xFE60CF6CAF321874, + 0xE224479F47CB76A0, 0xA0D4A674EE214033, 0x67C58448141F1B86, 0x253565A3BDF52D15, 0xAB1721DA49899A7F, 0xE9E7C031E063ACEC, 0x2EF6E20D1A5DF759, 0x6C0603E6B3B7C1CA, + 0xF6FAE5C07D3274CD, 0xB40A042BD4D8425E, 0x731B26172EE619EB, 0x31EBC7FC870C2F78, 0xBFC9838573709812, 0xFD39626EDA9AAE81, 0x3A28405220A4F534, 0x78D8A1B9894EC3A7, + 0x649C294A61B7AD73, 0x266CC8A1C85D9BE0, 0xE17DEA9D3263C055, 0xA38D0B769B89F6C6, 0x2DAF4F0F6FF541AC, 0x6F5FAEE4C61F773F, 0xA84E8CD83C212C8A, 0xEABE6D3395CB1A19, + 0x90C79D3FEDD3F122, 0xD2377CD44439C7B1, 0x15265EE8BE079C04, 0x57D6BF0317EDAA97, 0xD9F4FB7AE3911DFD, 0x9B041A914A7B2B6E, 0x5C1538ADB04570DB, 0x1EE5D94619AF4648, + 0x2A151B5F156289C, 0x4051B05E58BC1E0F, 0x87409262A28245BA, 0xC5B073890B687329, 0x4B9237F0FF14C443, 0x962D61B56FEF2D0, 0xCE73F427ACC0A965, 0x8C8315CC052A9FF6, + 0x3A80143F5CF17F13, 0x7870F5D4F51B4980, 0xBF61D7E80F251235, 0xFD913603A6CF24A6, 0x73B3727A52B393CC, 0x31439391FB59A55F, 0xF652B1AD0167FEEA, 0xB4A25046A88DC879, + 0xA8E6D8B54074A6AD, 0xEA16395EE99E903E, 0x2D071B6213A0CB8B, 0x6FF7FA89BA4AFD18, 0xE1D5BEF04E364A72, 0xA3255F1BE7DC7CE1, 0x64347D271DE22754, 0x26C49CCCB40811C7, + 0x5CBD6CC0CC10FAFC, 0x1E4D8D2B65FACC6F, 0xD95CAF179FC497DA, 0x9BAC4EFC362EA149, 0x158E0A85C2521623, 0x577EEB6E6BB820B0, 0x906FC95291867B05, 0xD29F28B9386C4D96, + 0xCEDBA04AD0952342, 0x8C2B41A1797F15D1, 0x4B3A639D83414E64, 0x9CA82762AAB78F7, 0x87E8C60FDED7CF9D, 0xC51827E4773DF90E, 0x20905D88D03A2BB, 0x40F9E43324E99428, + 0x2CFFE7D5975E55E2, 0x6E0F063E3EB46371, 0xA91E2402C48A38C4, 0xEBEEC5E96D600E57, 0x65CC8190991CB93D, 0x273C607B30F68FAE, 0xE02D4247CAC8D41B, 0xA2DDA3AC6322E288, + 0xBE992B5F8BDB8C5C, 0xFC69CAB42231BACF, 0x3B78E888D80FE17A, 0x7988096371E5D7E9, 0xF7AA4D1A85996083, 0xB55AACF12C735610, 0x724B8ECDD64D0DA5, 0x30BB6F267FA73B36, + 0x4AC29F2A07BFD00D, 0x8327EC1AE55E69E, 0xCF235CFD546BBD2B, 0x8DD3BD16FD818BB8, 0x3F1F96F09FD3CD2, 0x41011884A0170A41, 0x86103AB85A2951F4, 0xC4E0DB53F3C36767, + 0xD8A453A01B3A09B3, 0x9A54B24BB2D03F20, 0x5D45907748EE6495, 0x1FB5719CE1045206, 0x919735E51578E56C, 0xD367D40EBC92D3FF, 0x1476F63246AC884A, 0x568617D9EF46BED9, + 0xE085162AB69D5E3C, 0xA275F7C11F7768AF, 0x6564D5FDE549331A, 0x279434164CA30589, 0xA9B6706FB8DFB2E3, 0xEB46918411358470, 0x2C57B3B8EB0BDFC5, 0x6EA7525342E1E956, + 0x72E3DAA0AA188782, 0x30133B4B03F2B111, 0xF7021977F9CCEAA4, 0xB5F2F89C5026DC37, 0x3BD0BCE5A45A6B5D, 0x79205D0E0DB05DCE, 0xBE317F32F78E067B, 0xFCC19ED95E6430E8, + 0x86B86ED5267CDBD3, 0xC4488F3E8F96ED40, 0x359AD0275A8B6F5, 0x41A94CE9DC428066, 0xCF8B0890283E370C, 0x8D7BE97B81D4019F, 0x4A6ACB477BEA5A2A, 0x89A2AACD2006CB9, + 0x14DEA25F3AF9026D, 0x562E43B4931334FE, 0x913F6188692D6F4B, 0xD3CF8063C0C759D8, 0x5DEDC41A34BBEEB2, 0x1F1D25F19D51D821, 0xD80C07CD676F8394, 0x9AFCE626CE85B507, + ]; + public Ecma182ParameterSet() - : base(0x42F0E1EBA9EA3693, 0x0000000000000000, 0x0000000000000000, false) + : base(0x42F0E1EBA9EA3693, 0x0000000000000000, 0x0000000000000000) { } - internal override ulong Update(ulong value, ReadOnlySpan data) => Hashing.Crc64.Update(value, data); + protected override ulong UpdateScalar(ulong crc, ReadOnlySpan source) + { + ReadOnlySpan crcLookup = CrcLookup; + + foreach (byte b in source) + { + ulong idx = (crc >> 56); + idx ^= b; + crc = crcLookup[(int)idx] ^ (crc << 8); + } + + return crc; + } } } } diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.cs index 2115236e83a3c1..36ff5036b252e3 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Crc64ParameterSet.cs @@ -111,5 +111,39 @@ private static ulong ReverseBits(ulong value) return BinaryPrimitives.ReverseEndianness(value); } + + private abstract partial class ForwardCrc64 : Crc64ParameterSet + { +#if NET + // Declare the capability field here so it can be declared readonly. + private readonly bool _canVectorize; +#endif + + partial void InitializeVectorized(ref bool canVectorize); + partial void UpdateVectorized(ref ulong crc, ReadOnlySpan source, ref int bytesConsumed); + + protected ForwardCrc64(ulong polynomial, ulong initialValue, ulong finalXorValue) + : base(polynomial, initialValue, finalXorValue, reflectValues: false) + { +#if NET + InitializeVectorized(ref _canVectorize); +#endif + } + + protected abstract ulong UpdateScalar(ulong value, ReadOnlySpan source); + + internal sealed override ulong Update(ulong value, ReadOnlySpan source) + { + int consumed = 0; + UpdateVectorized(ref value, source, ref consumed); + + if (consumed < source.Length) + { + value = UpdateScalar(value, source.Slice(consumed)); + } + + return value; + } + } } } diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/CrcPolynomialHelper.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/CrcPolynomialHelper.cs new file mode 100644 index 00000000000000..31333fce7d1c9c --- /dev/null +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/CrcPolynomialHelper.cs @@ -0,0 +1,158 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#if NET + +using System.Runtime.CompilerServices; + +namespace System.IO.Hashing +{ + internal static class CrcPolynomialHelper + { + internal static ulong ComputeFoldingConstantCrc32(ulong fullPoly, int power) + { + UInt640 poly = new(fullPoly); + return ComputeFoldingConstant(poly, power); + } + + internal static ulong ComputeFoldingConstantCrc64(ulong reducedPolynomial, int power) + { + UInt640 poly = new(1UL, reducedPolynomial); + return ComputeFoldingConstant(poly, power); + } + + private static ulong ComputeFoldingConstant(UInt640 poly, int power) + { + int polyDeg = poly.Degree; + + UInt640 value = new(1); + value.ShiftLeftEquals(power); + + while (value.Degree >= polyDeg) + { + int shift = value.Degree - polyDeg; + UInt640 polyShifted = poly; + polyShifted.ShiftLeftEquals(shift); + value.XorEquals(ref polyShifted); + } + + return value.ToUInt64(); + } + + internal static ulong ComputeBarrettConstantCrc32(ulong fullPoly) + { + UInt640 poly = new(fullPoly); + return ComputeBarrettConstant(poly, 64); + } + + internal static ulong ComputeBarrettConstantCrc64(ulong reducedPolynomial) + { + UInt640 poly = new(1UL, reducedPolynomial); + return ComputeBarrettConstant(poly, 128); + } + + private static ulong ComputeBarrettConstant(UInt640 poly, int power) + { + int polyDeg = poly.Degree; + + UInt640 value = new(1); + value.ShiftLeftEquals(power); + + UInt640 quotient = default; + + while (value.Degree >= polyDeg) + { + int shift = value.Degree - polyDeg; + UInt640 polyShifted = poly; + polyShifted.ShiftLeftEquals(shift); + value.XorEquals(ref polyShifted); + + UInt640 bit = new(1); + bit.ShiftLeftEquals(shift); + quotient.XorEquals(ref bit); + } + + return quotient.ToUInt64(); + } + + [InlineArray(Length)] + private struct UInt640 + { + private const int Length = 10; + private ulong _element; + + internal UInt640(ulong value) + { + this = default; + this[0] = value; + } + + internal UInt640(ulong high, ulong low) + { + this = default; + this[0] = low; + this[1] = high; + } + + internal readonly int Degree + { + get + { + for (int i = Length - 1; i >= 0; i--) + { + if (this[i] != 0) + { + return (i * 64) + (63 - (int)ulong.LeadingZeroCount(this[i])); + } + } + + return -1; + } + } + + internal void ShiftLeftEquals(int count) + { + int wordShift = count >> 6; // count / 64 + int bitShift = count & 63; // count % 64 + + if (wordShift > 0) + { + for (int i = Length - 1; i >= wordShift; i--) + { + this[i] = this[i - wordShift]; + } + + for (int i = wordShift - 1; i >= 0; i--) + { + this[i] = 0; + } + } + + if (bitShift > 0) + { + for (int i = Length - 1; i > 0; i--) + { + this[i] = (this[i] << bitShift) | (this[i - 1] >> (64 - bitShift)); + } + + this[0] <<= bitShift; + } + } + + internal void XorEquals(ref UInt640 other) + { + for (int i = 0; i < Length; i++) + { + this[i] ^= other[i]; + } + } + + internal readonly ulong ToUInt64() + { + return this[0]; + } + } + } +} + +#endif diff --git a/src/libraries/System.IO.Hashing/tests/Crc32Tests_ParameterSet_Custom.cs b/src/libraries/System.IO.Hashing/tests/Crc32Tests_ParameterSet_Custom.cs index 3d26be11a883ce..556a995da2b940 100644 --- a/src/libraries/System.IO.Hashing/tests/Crc32Tests_ParameterSet_Custom.cs +++ b/src/libraries/System.IO.Hashing/tests/Crc32Tests_ParameterSet_Custom.cs @@ -78,7 +78,36 @@ public class Crc32CDRomEdcDriver : Crc32DriverBase }; } + public class Crc32HD16ForwardDriver : Crc32DriverBase + { + // Koopman's HD-13 CRC-32 polynomial with an arbitrary non-zero initial and final value. + // This value is really just a polynomial with a lot of high bits set to ensure the vector code + // isn't depending on the common 0x04... + internal override Crc32ParameterSet ParameterSet => Crc32ParameterSet.Create( + polynomial: 0xE89061DB, + initialValue: 0x00000001, + finalXorValue: 0x00000003, + reflectValues: false); + + internal override string EmptyOutput => "00000002"; + internal override string Residue => "D120C3B5"; + + internal override string? GetExpectedOutput(string testCaseName) => + testCaseName switch + { + "One" => "E89060D8", + "Zero" => "E89061D8", + "Self-test 123456789" => "00FA61B6", + "The quick brown fox jumps over the lazy dog" => "A81AC12F", + "Lorem ipsum 128" => "7B773166", + "Lorem ipsum 144" => "D3A67D09", + "Lorem ipsum 1001" => "2E99E5F2", + _ => null, + }; + } + public class Crc32Tests_ParameterSet_Custom_Cksum : Crc32Tests_Parameterized; public class Crc32Tests_ParameterSet_Custom_CDRomEdc : Crc32Tests_Parameterized; public class Crc32Tests_ParameterSet_Custom_Mef : Crc32Tests_Parameterized; + public class Crc32Tests_ParameterSet_Custom_HD16Forward : Crc32Tests_Parameterized; }