Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ System.IO.Hashing.XxHash32</PackageDescription>
</ItemGroup>

<ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETCoreApp'">
<Compile Include="System\IO\Hashing\Adler32Simd.cs" />
<Compile Include="System\IO\Hashing\Crc32.Arm.cs" />
<Compile Include="System\IO\Hashing\Crc32.Vectorized.cs" />
<Compile Include="System\IO\Hashing\Crc64.Vectorized.cs" />
Expand Down
268 changes: 3 additions & 265 deletions src/libraries/System.IO.Hashing/src/System/IO/Hashing/Adler32.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if NET
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
#endif

namespace System.IO.Hashing
{
Expand All @@ -30,7 +25,7 @@ public sealed partial class Adler32 : NonCryptographicHashAlgorithm
private uint _adler = InitialState;

/// <summary>Largest prime smaller than 65536.</summary>
private const uint ModBase = 65521;
internal const uint ModBase = 65521;
/// <summary>NMax is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) &lt;= 2^32-1</summary>
private const int NMax = 5552;

Expand Down Expand Up @@ -192,21 +187,9 @@ private static uint Update(uint adler, ReadOnlySpan<byte> source)
}

#if NET
if (BitConverter.IsLittleEndian &&
Vector128.IsHardwareAccelerated &&
source.Length >= Vector128<byte>.Count * 2)
if (IsVectorizable(source))
{
if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && source.Length >= Vector512<byte>.Count)
{
return UpdateVector512(adler, source);
}

if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && source.Length >= Vector256<byte>.Count)
{
return UpdateVector256(adler, source);
}

return UpdateVector128(adler, source);
return UpdateVectorized(adler, source);
}
#endif

Expand Down Expand Up @@ -236,250 +219,5 @@ private static uint UpdateScalar(uint adler, ReadOnlySpan<byte> source)

return (s2 << 16) | s1;
}

#if NET
[MethodImpl(MethodImplOptions.NoInlining)]
private static uint UpdateVector128(uint adler, ReadOnlySpan<byte> source)
{
Debug.Assert(source.Length >= Vector128<byte>.Count * 2);

const int BlockSize = 32; // two Vector128<byte> loads

uint s1 = adler & 0xFFFF;
uint s2 = (adler >> 16) & 0xFFFF;

ref byte sourceRef = ref MemoryMarshal.GetReference(source);
int length = source.Length;

Vector128<sbyte> tap1 = Vector128.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17);
Vector128<sbyte> tap2 = Vector128.Create(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);

do
{
int n = Math.Min(length, NMax);
int blocks = n / BlockSize;
n = blocks * BlockSize;
length -= n;

Vector128<uint> vs1 = Vector128<uint>.Zero;
Vector128<uint> vs2 = Vector128.CreateScalar(s2);
Vector128<uint> vps = Vector128.CreateScalar(s1 * (uint)blocks);

do
{
Vector128<byte> bytes1 = Vector128.LoadUnsafe(ref sourceRef);
Vector128<byte> bytes2 = Vector128.LoadUnsafe(ref sourceRef, 16);
sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize);

vps += vs1;

if (Ssse3.IsSupported)
{
vs1 += Sse2.SumAbsoluteDifferences(bytes1, Vector128<byte>.Zero).AsUInt32();
vs1 += Sse2.SumAbsoluteDifferences(bytes2, Vector128<byte>.Zero).AsUInt32();

vs2 += Sse2.MultiplyAddAdjacent(Ssse3.MultiplyAddAdjacent(bytes1, tap1), Vector128<short>.One).AsUInt32();
vs2 += Sse2.MultiplyAddAdjacent(Ssse3.MultiplyAddAdjacent(bytes2, tap2), Vector128<short>.One).AsUInt32();
}
else if (AdvSimd.IsSupported)
{
// Widening byte sum (equivalent of SumAbsoluteDifferences against zero)
vs1 = AdvSimd.AddPairwiseWideningAndAdd(
vs1,
AdvSimd.AddPairwiseWideningAndAdd(
AdvSimd.AddPairwiseWidening(bytes1),
bytes2));

// Widening multiply + horizontal add (equivalent of MultiplyAddAdjacent chain).
// Because weights are all positive (1-32), unsigned byte * unsigned byte multiply is valid.
Vector128<ushort> wprod1 = AdvSimd.MultiplyWideningLower(bytes1.GetLower(), tap1.AsByte().GetLower());
wprod1 = AdvSimd.MultiplyWideningUpperAndAdd(wprod1, bytes1, tap1.AsByte());
vs2 = AdvSimd.AddPairwiseWideningAndAdd(vs2, wprod1);

Vector128<ushort> wprod2 = AdvSimd.MultiplyWideningLower(bytes2.GetLower(), tap2.AsByte().GetLower());
wprod2 = AdvSimd.MultiplyWideningUpperAndAdd(wprod2, bytes2, tap2.AsByte());
vs2 = AdvSimd.AddPairwiseWideningAndAdd(vs2, wprod2);
}
else
{
(Vector128<ushort> lo1, Vector128<ushort> hi1) = Vector128.Widen(bytes1);
(Vector128<ushort> lo2, Vector128<ushort> hi2) = Vector128.Widen(bytes2);
(Vector128<uint> sumLo, Vector128<uint> sumHi) = Vector128.Widen(lo1 + hi1 + lo2 + hi2);
vs1 += sumLo + sumHi;
vs2 += WeightedSumWidening128(bytes1, tap1) + WeightedSumWidening128(bytes2, tap2);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static Vector128<uint> WeightedSumWidening128(Vector128<byte> data, Vector128<sbyte> weights)
{
(Vector128<ushort> dLo, Vector128<ushort> dHi) = Vector128.Widen(data);
(Vector128<short> wLo, Vector128<short> wHi) = Vector128.Widen(weights);

(Vector128<int> pLo1, Vector128<int> pHi1) = Vector128.Widen(dLo.AsInt16() * wLo);
(Vector128<int> pLo2, Vector128<int> pHi2) = Vector128.Widen(dHi.AsInt16() * wHi);

return (pLo1 + pHi1 + pLo2 + pHi2).AsUInt32();
}
}
}
while (--blocks > 0);

vs2 += vps << 5;

s1 += Vector128.Sum(vs1);
s2 = Vector128.Sum(vs2);

s1 %= ModBase;
s2 %= ModBase;
}
while (length >= BlockSize);

if (length > 0)
{
UpdateScalarTail(ref sourceRef, length, ref s1, ref s2);
}

return (s2 << 16) | s1;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static uint UpdateVector256(uint adler, ReadOnlySpan<byte> source)
{
Debug.Assert(source.Length >= Vector256<byte>.Count);

const int BlockSize = 32;

uint s1 = adler & 0xFFFF;
uint s2 = (adler >> 16) & 0xFFFF;

ref byte sourceRef = ref MemoryMarshal.GetReference(source);
int length = source.Length;

Vector256<sbyte> weights = Vector256.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);

do
{
int n = Math.Min(length, NMax);
int blocks = n / BlockSize;
n = blocks * BlockSize;
length -= n;

Vector256<uint> vs1 = Vector256.CreateScalar(s1);
Vector256<uint> vs2 = Vector256.CreateScalar(s2);
Vector256<uint> vs3 = Vector256<uint>.Zero;

do
{
Vector256<byte> data = Vector256.LoadUnsafe(ref sourceRef);
sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize);

Vector256<uint> vs1_0 = vs1;
vs1 += Avx2.SumAbsoluteDifferences(data, Vector256<byte>.Zero).AsUInt32();
vs3 += vs1_0;

Vector256<short> mad = Avx2.MultiplyAddAdjacent(data, weights);
vs2 += Avx2.MultiplyAddAdjacent(mad, Vector256<short>.One).AsUInt32();
}
while (--blocks > 0);

vs3 <<= 5;
vs2 += vs3;

s1 = (uint)Vector256.Sum(vs1.AsUInt64()); // SumAbsoluteDifferences stores the results in the even lanes
s2 = Vector256.Sum(vs2);

s1 %= ModBase;
s2 %= ModBase;
}
while (length >= BlockSize);

if (length > 0)
{
UpdateScalarTail(ref sourceRef, length, ref s1, ref s2);
}

return (s2 << 16) | s1;
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static uint UpdateVector512(uint adler, ReadOnlySpan<byte> source)
{
Debug.Assert(source.Length >= Vector512<byte>.Count);

const int BlockSize = 64;

uint s1 = adler & 0xFFFF;
uint s2 = (adler >> 16) & 0xFFFF;

ref byte sourceRef = ref MemoryMarshal.GetReference(source);
int length = source.Length;

Vector512<sbyte> weights = Vector512.Create(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1,
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);

do
{
int n = Math.Min(length, NMax);
int blocks = n / BlockSize;
n = blocks * BlockSize;
length -= n;

Vector512<uint> vs1 = Vector512.CreateScalar(s1);
Vector512<uint> vs2 = Vector512.CreateScalar(s2);
Vector512<uint> vs3 = Vector512<uint>.Zero;

do
{
Vector512<byte> data = Vector512.LoadUnsafe(ref sourceRef);
sourceRef = ref Unsafe.Add(ref sourceRef, BlockSize);

Vector512<uint> vs1_0 = vs1;
vs1 += Avx512BW.SumAbsoluteDifferences(data, Vector512<byte>.Zero).AsUInt32();
vs3 += vs1_0;
vs2 += Avx512BW.MultiplyAddAdjacent(Avx512BW.MultiplyAddAdjacent(data, weights), Vector512<short>.One).AsUInt32();

Vector256<uint> sumLo = Avx2.SumAbsoluteDifferences(data.GetLower(), Vector256<byte>.Zero).AsUInt32();
vs2 += Vector512.Create(sumLo << 5, Vector256<uint>.Zero);
}
while (--blocks > 0);

vs3 <<= 6;
vs2 += vs3;

s1 = (uint)Vector512.Sum(vs1.AsUInt64());
s2 = Vector512.Sum(vs2);

s1 %= ModBase;
s2 %= ModBase;
}
while (length >= BlockSize);

if (length >= Vector256<byte>.Count)
{
return UpdateVector256((s2 << 16) | s1, MemoryMarshal.CreateReadOnlySpan(ref sourceRef, length));
}

if (length > 0)
{
UpdateScalarTail(ref sourceRef, length, ref s1, ref s2);
}

return (s2 << 16) | s1;
}

private static void UpdateScalarTail(ref byte sourceRef, int length, ref uint s1, ref uint s2)
{
Debug.Assert(length is > 0 and < NMax);

foreach (byte b in MemoryMarshal.CreateReadOnlySpan(ref sourceRef, length))
{
s1 += b;
s2 += s1;
}

s1 %= ModBase;
s2 %= ModBase;
}
#endif
}
}
Loading
Loading