diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs index a83ab6709ddcb0..da54cdb3372b19 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs @@ -2,20 +2,25 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Buffers.Binary; +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#if NET +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; +#endif namespace System.IO.Hashing { /// - /// Provides an implementation of the Adler-32 algorithm, as used in - /// RFC1950. + /// Provides an implementation of the Adler-32 checksum algorithm, as specified in + /// RFC 1950. /// /// /// - /// The Adler-32 algorithm is designed for fast, lightweight integrity checking and is commonly used in - /// data compression and transmission scenarios. This class is not suitable for cryptographic purposes. - /// - /// - /// Adler-32 is not as robust as other checksum algorithms like CRC32, but it is faster to compute. + /// This algorithm produces a 32-bit checksum and is commonly used in + /// data compression formats such as zlib. It is not suitable for cryptographic purposes. /// /// public sealed partial class Adler32 : NonCryptographicHashAlgorithm @@ -24,6 +29,11 @@ public sealed partial class Adler32 : NonCryptographicHashAlgorithm private const int Size = sizeof(uint); private uint _adler = InitialState; + /// Largest prime smaller than 65536. + private const uint ModBase = 65521; + /// NMax is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 + private const int NMax = 5552; + /// /// Initializes a new instance of the class. /// @@ -174,37 +184,302 @@ public static int Hash(ReadOnlySpan source, Span destination) public static uint HashToUInt32(ReadOnlySpan source) => Update(InitialState, source); - private static uint Update(uint adler, ReadOnlySpan buf) + private static uint Update(uint adler, ReadOnlySpan source) { - if (buf.IsEmpty) + if (source.IsEmpty) { return adler; } - return UpdateScalar(adler, buf); +#if NET + if (BitConverter.IsLittleEndian && + Vector128.IsHardwareAccelerated && + source.Length >= Vector128.Count * 2) + { + if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && source.Length >= Vector512.Count) + { + return UpdateVector512(adler, source); + } + + if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && source.Length >= Vector256.Count) + { + return UpdateVector256(adler, source); + } + + return UpdateVector128(adler, source); + } +#endif + + return UpdateScalar(adler, source); } - private static uint UpdateScalar(uint adler, ReadOnlySpan buf) + private static uint UpdateScalar(uint adler, ReadOnlySpan source) { - const uint Base = 65521; // largest prime smaller than 65536 - const int NMax = 5552; // NMax is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 - uint s1 = adler & 0xFFFF; uint s2 = (adler >> 16) & 0xFFFF; - while (buf.Length > 0) + Debug.Assert(!source.IsEmpty); + + do { - int k = buf.Length < NMax ? buf.Length : NMax; - foreach (byte b in buf.Slice(0, k)) + int k = source.Length < NMax ? source.Length : NMax; + foreach (byte b in source.Slice(0, k)) { s1 += b; s2 += s1; } - s1 %= Base; - s2 %= Base; - buf = buf.Slice(k); + + s1 %= ModBase; + s2 %= ModBase; + source = source.Slice(k); } + while (source.Length > 0); return (s2 << 16) | s1; } + +#if NET + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint UpdateVector128(uint adler, ReadOnlySpan source) + { + Debug.Assert(source.Length >= Vector128.Count * 2); + + const int BlockSize = 32; // two Vector128 loads + + uint s1 = adler & 0xFFFF; + uint s2 = (adler >> 16) & 0xFFFF; + + ref byte sourceRef = ref MemoryMarshal.GetReference(source); + int length = source.Length; + + Vector128 tap1 = Vector128.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17); + Vector128 tap2 = Vector128.Create(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + + do + { + int n = Math.Min(length, NMax); + int blocks = n / BlockSize; + n = blocks * BlockSize; + length -= n; + + Vector128 vs1 = Vector128.Zero; + Vector128 vs2 = Vector128.CreateScalar(s2); + Vector128 vps = Vector128.CreateScalar(s1 * (uint)blocks); + + do + { + Vector128 bytes1 = Vector128.LoadUnsafe(ref sourceRef); + Vector128 bytes2 = Vector128.LoadUnsafe(ref sourceRef, 16); + sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize); + + vps += vs1; + + if (Ssse3.IsSupported) + { + vs1 += Sse2.SumAbsoluteDifferences(bytes1, Vector128.Zero).AsUInt32(); + vs1 += Sse2.SumAbsoluteDifferences(bytes2, Vector128.Zero).AsUInt32(); + + vs2 += Sse2.MultiplyAddAdjacent(Ssse3.MultiplyAddAdjacent(bytes1, tap1), Vector128.One).AsUInt32(); + vs2 += Sse2.MultiplyAddAdjacent(Ssse3.MultiplyAddAdjacent(bytes2, tap2), Vector128.One).AsUInt32(); + } + else if (AdvSimd.IsSupported) + { + // Widening byte sum (equivalent of SumAbsoluteDifferences against zero) + vs1 = AdvSimd.AddPairwiseWideningAndAdd( + vs1, + AdvSimd.AddPairwiseWideningAndAdd( + AdvSimd.AddPairwiseWidening(bytes1), + bytes2)); + + // Widening multiply + horizontal add (equivalent of MultiplyAddAdjacent chain). + // Because weights are all positive (1-32), unsigned byte * unsigned byte multiply is valid. + Vector128 wprod1 = AdvSimd.MultiplyWideningLower(bytes1.GetLower(), tap1.AsByte().GetLower()); + wprod1 = AdvSimd.MultiplyWideningUpperAndAdd(wprod1, bytes1, tap1.AsByte()); + vs2 = AdvSimd.AddPairwiseWideningAndAdd(vs2, wprod1); + + Vector128 wprod2 = AdvSimd.MultiplyWideningLower(bytes2.GetLower(), tap2.AsByte().GetLower()); + wprod2 = AdvSimd.MultiplyWideningUpperAndAdd(wprod2, bytes2, tap2.AsByte()); + vs2 = AdvSimd.AddPairwiseWideningAndAdd(vs2, wprod2); + } + else + { + (Vector128 lo1, Vector128 hi1) = Vector128.Widen(bytes1); + (Vector128 lo2, Vector128 hi2) = Vector128.Widen(bytes2); + (Vector128 sumLo, Vector128 sumHi) = Vector128.Widen(lo1 + hi1 + lo2 + hi2); + vs1 += sumLo + sumHi; + vs2 += WeightedSumWidening128(bytes1, tap1) + WeightedSumWidening128(bytes2, tap2); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 WeightedSumWidening128(Vector128 data, Vector128 weights) + { + (Vector128 dLo, Vector128 dHi) = Vector128.Widen(data); + (Vector128 wLo, Vector128 wHi) = Vector128.Widen(weights); + + (Vector128 pLo1, Vector128 pHi1) = Vector128.Widen(dLo.AsInt16() * wLo); + (Vector128 pLo2, Vector128 pHi2) = Vector128.Widen(dHi.AsInt16() * wHi); + + return (pLo1 + pHi1 + pLo2 + pHi2).AsUInt32(); + } + } + } + while (--blocks > 0); + + vs2 += vps << 5; + + s1 += Vector128.Sum(vs1); + s2 = Vector128.Sum(vs2); + + s1 %= ModBase; + s2 %= ModBase; + } + while (length >= BlockSize); + + if (length > 0) + { + UpdateScalarTail(ref sourceRef, length, ref s1, ref s2); + } + + return (s2 << 16) | s1; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint UpdateVector256(uint adler, ReadOnlySpan source) + { + Debug.Assert(source.Length >= Vector256.Count); + + const int BlockSize = 32; + + uint s1 = adler & 0xFFFF; + uint s2 = (adler >> 16) & 0xFFFF; + + ref byte sourceRef = ref MemoryMarshal.GetReference(source); + int length = source.Length; + + Vector256 weights = Vector256.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + + do + { + int n = Math.Min(length, NMax); + int blocks = n / BlockSize; + n = blocks * BlockSize; + length -= n; + + Vector256 vs1 = Vector256.CreateScalar(s1); + Vector256 vs2 = Vector256.CreateScalar(s2); + Vector256 vs3 = Vector256.Zero; + + do + { + Vector256 data = Vector256.LoadUnsafe(ref sourceRef); + sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize); + + Vector256 vs1_0 = vs1; + vs1 += Avx2.SumAbsoluteDifferences(data, Vector256.Zero).AsUInt32(); + vs3 += vs1_0; + + Vector256 mad = Avx2.MultiplyAddAdjacent(data, weights); + vs2 += Avx2.MultiplyAddAdjacent(mad, Vector256.One).AsUInt32(); + } + while (--blocks > 0); + + vs3 <<= 5; + vs2 += vs3; + + s1 = (uint)Vector256.Sum(vs1.AsUInt64()); // SumAbsoluteDifferences stores the results in the even lanes + s2 = Vector256.Sum(vs2); + + s1 %= ModBase; + s2 %= ModBase; + } + while (length >= BlockSize); + + if (length > 0) + { + UpdateScalarTail(ref sourceRef, length, ref s1, ref s2); + } + + return (s2 << 16) | s1; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static uint UpdateVector512(uint adler, ReadOnlySpan source) + { + Debug.Assert(source.Length >= Vector512.Count); + + const int BlockSize = 64; + + uint s1 = adler & 0xFFFF; + uint s2 = (adler >> 16) & 0xFFFF; + + ref byte sourceRef = ref MemoryMarshal.GetReference(source); + int length = source.Length; + + Vector512 weights = Vector512.Create( + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, + 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + + do + { + int n = Math.Min(length, NMax); + int blocks = n / BlockSize; + n = blocks * BlockSize; + length -= n; + + Vector512 vs1 = Vector512.CreateScalar(s1); + Vector512 vs2 = Vector512.CreateScalar(s2); + Vector512 vs3 = Vector512.Zero; + + do + { + Vector512 data = Vector512.LoadUnsafe(ref sourceRef); + sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize); + + Vector512 vs1_0 = vs1; + vs1 += Avx512BW.SumAbsoluteDifferences(data, Vector512.Zero).AsUInt32(); + vs3 += vs1_0; + vs2 += Avx512BW.MultiplyAddAdjacent(Avx512BW.MultiplyAddAdjacent(data, weights), Vector512.One).AsUInt32(); + + Vector256 sumLo = Avx2.SumAbsoluteDifferences(data.GetLower(), Vector256.Zero).AsUInt32(); + vs2 += Vector512.Create(sumLo << 5, Vector256.Zero); + } + while (--blocks > 0); + + vs3 <<= 6; + vs2 += vs3; + + s1 = (uint)Vector512.Sum(vs1.AsUInt64()); + s2 = Vector512.Sum(vs2); + + s1 %= ModBase; + s2 %= ModBase; + } + while (length >= BlockSize); + + if (length >= Vector256.Count) + { + return UpdateVector256((s2 << 16) | s1, MemoryMarshal.CreateReadOnlySpan(ref sourceRef, length)); + } + + if (length > 0) + { + UpdateScalarTail(ref sourceRef, length, ref s1, ref s2); + } + + return (s2 << 16) | s1; + } + + private static void UpdateScalarTail(ref byte sourceRef, int length, ref uint s1, ref uint s2) + { + Debug.Assert(length is > 0 and < NMax); + + foreach (byte b in MemoryMarshal.CreateReadOnlySpan(ref sourceRef, length)) + { + s1 += b; + s2 += s1; + } + + s1 %= ModBase; + s2 %= ModBase; + } +#endif } } diff --git a/src/libraries/System.IO.Hashing/tests/Adler32Tests.cs b/src/libraries/System.IO.Hashing/tests/Adler32Tests.cs index 5b39a00669abf5..12ec92693d3d51 100644 --- a/src/libraries/System.IO.Hashing/tests/Adler32Tests.cs +++ b/src/libraries/System.IO.Hashing/tests/Adler32Tests.cs @@ -168,5 +168,130 @@ public void LargeInput_ExceedsNMax(int length, uint expected) alg.Append(data); Assert.Equal(expected, alg.GetCurrentHashAsUInt32()); } + + /// + /// Tests a wide variety of lengths to exercise scalar, Vector128, Vector256, and Vector512 + /// code paths as well as their transitions and tail handling. + /// + [Theory] + [InlineData(1)] + [InlineData(2)] + [InlineData(7)] + [InlineData(15)] + [InlineData(16)] + [InlineData(17)] + [InlineData(31)] + [InlineData(32)] + [InlineData(33)] + [InlineData(47)] + [InlineData(48)] + [InlineData(63)] + [InlineData(64)] + [InlineData(65)] + [InlineData(95)] + [InlineData(96)] + [InlineData(127)] + [InlineData(128)] + [InlineData(129)] + [InlineData(255)] + [InlineData(256)] + [InlineData(512)] + [InlineData(1000)] + [InlineData(1023)] + [InlineData(1024)] + [InlineData(4096)] + [InlineData(5551)] + [InlineData(5552)] + [InlineData(5553)] + [InlineData(5600)] + [InlineData(8192)] + [InlineData(11104)] + [InlineData(16384)] + public void VariousLengths_MatchesReference(int length) + { + byte[] data = new byte[length]; + for (int i = 0; i < data.Length; i++) + { + data[i] = (byte)(i % 251); + } + + uint expected = ReferenceAdler32(data); + Assert.Equal(expected, Adler32.HashToUInt32(data)); + + var alg = new Adler32(); + alg.Append(data); + Assert.Equal(expected, alg.GetCurrentHashAsUInt32()); + } + + /// + /// Tests with all-0xFF bytes, which maximizes accumulator values and stresses + /// overflow-safe behavior in the vectorized paths. + /// + [Theory] + [InlineData(32)] + [InlineData(64)] + [InlineData(128)] + [InlineData(256)] + [InlineData(5552)] + [InlineData(5553)] + public void AllMaxBytes_MatchesReference(int length) + { + byte[] data = new byte[length]; + data.AsSpan().Fill(0xFF); + + Assert.Equal(ReferenceAdler32(data), Adler32.HashToUInt32(data)); + } + + /// + /// Tests incremental appending with various chunk sizes to verify that the + /// vectorized paths produce the same result regardless of how data is fed in. + /// + [Theory] + [InlineData(1)] + [InlineData(7)] + [InlineData(16)] + [InlineData(32)] + [InlineData(64)] + [InlineData(100)] + public void IncrementalAppend_MatchesOneShot(int chunkSize) + { + byte[] data = new byte[1000]; + for (int i = 0; i < data.Length; i++) + { + data[i] = (byte)(i * 7 + 13); + } + + uint oneShot = Adler32.HashToUInt32(data); + + var alg = new Adler32(); + int offset = 0; + while (offset < data.Length) + { + int len = Math.Min(chunkSize, data.Length - offset); + alg.Append(data.AsSpan(offset, len)); + offset += len; + } + + Assert.Equal(oneShot, alg.GetCurrentHashAsUInt32()); + } + + /// + /// Computes a reference Adler32 result using the simplest possible scalar implementation. + /// + private static uint ReferenceAdler32(ReadOnlySpan data, uint adler = 1) + { + const uint Base = 65521; + + uint s1 = adler & 0xFFFF; + uint s2 = (adler >> 16) & 0xFFFF; + + foreach (byte b in data) + { + s1 = (s1 + b) % Base; + s2 = (s2 + s1) % Base; + } + + return (s2 << 16) | s1; + } } }