Skip to content

Commit 1958c7e

Browse files
authored
Handle final elements in SpanHelpers.Contains for byte and char vectorized (#67492)
* Handle final elements in SpanHelpers.Contains(ref byte, byte, int) vectorized * Handle final elements in SpanHelpers.Contains(ref char, char, int) vectorized * Use equality operator instead of Vector<T>.Zero.Equals due to codegen issue Cf. #67492 (comment)
1 parent c92e8d5 commit 1958c7e

2 files changed

Lines changed: 75 additions & 61 deletions

File tree

src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -341,26 +341,26 @@ public static bool Contains(ref byte searchSpace, byte value, int length)
341341

342342
uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions
343343
nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
344-
nuint lengthToExamine = (nuint)(uint)length;
344+
nuint lengthToExamine = (uint)length;
345345

346346
if (Vector.IsHardwareAccelerated && length >= Vector<byte>.Count * 2)
347347
{
348348
lengthToExamine = UnalignedCountVector(ref searchSpace);
349349
}
350350

351-
SequentialScan:
352351
while (lengthToExamine >= 8)
353352
{
354353
lengthToExamine -= 8;
355-
356-
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 0) ||
357-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1) ||
358-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2) ||
359-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3) ||
360-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4) ||
361-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5) ||
362-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6) ||
363-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7))
354+
ref byte start = ref Unsafe.AddByteOffset(ref searchSpace, offset);
355+
356+
if (uValue == Unsafe.AddByteOffset(ref start, 0) ||
357+
uValue == Unsafe.AddByteOffset(ref start, 1) ||
358+
uValue == Unsafe.AddByteOffset(ref start, 2) ||
359+
uValue == Unsafe.AddByteOffset(ref start, 3) ||
360+
uValue == Unsafe.AddByteOffset(ref start, 4) ||
361+
uValue == Unsafe.AddByteOffset(ref start, 5) ||
362+
uValue == Unsafe.AddByteOffset(ref start, 6) ||
363+
uValue == Unsafe.AddByteOffset(ref start, 7))
364364
{
365365
goto Found;
366366
}
@@ -371,11 +371,12 @@ public static bool Contains(ref byte searchSpace, byte value, int length)
371371
if (lengthToExamine >= 4)
372372
{
373373
lengthToExamine -= 4;
374+
ref byte start = ref Unsafe.AddByteOffset(ref searchSpace, offset);
374375

375-
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 0) ||
376-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1) ||
377-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2) ||
378-
uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
376+
if (uValue == Unsafe.AddByteOffset(ref start, 0) ||
377+
uValue == Unsafe.AddByteOffset(ref start, 1) ||
378+
uValue == Unsafe.AddByteOffset(ref start, 2) ||
379+
uValue == Unsafe.AddByteOffset(ref start, 3))
379380
{
380381
goto Found;
381382
}
@@ -385,24 +386,25 @@ public static bool Contains(ref byte searchSpace, byte value, int length)
385386

386387
while (lengthToExamine > 0)
387388
{
388-
lengthToExamine -= 1;
389+
lengthToExamine--;
389390

390391
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
391392
goto Found;
392393

393-
offset += 1;
394+
offset++;
394395
}
395396

396-
if (Vector.IsHardwareAccelerated && (offset < (nuint)(uint)length))
397+
if (Vector.IsHardwareAccelerated && (offset < (uint)length))
397398
{
398-
lengthToExamine = (((nuint)(uint)length - offset) & (nuint)~(Vector<byte>.Count - 1));
399+
lengthToExamine = ((uint)length - offset) & (nuint)~(Vector<byte>.Count - 1);
399400

400-
Vector<byte> values = new Vector<byte>(value);
401+
Vector<byte> values = new(value);
402+
Vector<byte> matches;
401403

402-
while (lengthToExamine > offset)
404+
while (offset < lengthToExamine)
403405
{
404-
var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset));
405-
if (Vector<byte>.Zero.Equals(matches))
406+
matches = Vector.Equals(values, LoadVector(ref searchSpace, offset));
407+
if (matches == Vector<byte>.Zero)
406408
{
407409
offset += (nuint)Vector<byte>.Count;
408410
continue;
@@ -411,10 +413,17 @@ public static bool Contains(ref byte searchSpace, byte value, int length)
411413
goto Found;
412414
}
413415

414-
if (offset < (nuint)(uint)length)
416+
// The total length is at least Vector<byte>.Count, so instead of falling back to a
417+
// sequential scan for the remainder, we check the vector read from the end -- note: unaligned read necessary.
418+
// We do this only if at least one element is left.
419+
if (offset < (uint)length)
415420
{
416-
lengthToExamine = ((nuint)(uint)length - offset);
417-
goto SequentialScan;
421+
offset = (uint)(length - Vector<byte>.Count);
422+
matches = Vector.Equals(values, LoadVector(ref searchSpace, offset));
423+
if (matches != Vector<byte>.Zero)
424+
{
425+
goto Found;
426+
}
418427
}
419428
}
420429

src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs

Lines changed: 40 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ public static unsafe int SequenceCompareTo(ref char first, int firstLength, ref
388388

389389
while (minLength >= (i + (nuint)(sizeof(nuint) / sizeof(char))))
390390
{
391-
if (Unsafe.ReadUnaligned<nuint> (ref Unsafe.As<char, byte>(ref Unsafe.Add(ref first, (nint)i))) !=
391+
if (Unsafe.ReadUnaligned<nuint>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref first, (nint)i))) !=
392392
Unsafe.ReadUnaligned<nuint>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref second, (nint)i))))
393393
{
394394
break;
@@ -428,83 +428,88 @@ public static unsafe bool Contains(ref char searchSpace, char value, int length)
428428

429429
fixed (char* pChars = &searchSpace)
430430
{
431-
char* pCh = pChars;
432-
char* pEndCh = pCh + length;
431+
nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations
432+
nuint lengthToExamine = (uint)length;
433433

434434
if (Vector.IsHardwareAccelerated && length >= Vector<ushort>.Count * 2)
435435
{
436436
// Figure out how many characters to read sequentially until we are vector aligned
437437
// This is equivalent to:
438-
// unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / elementsPerByte
438+
// unaligned = ((int)pCh % Unsafe.SizeOf<Vector<ushort>>()) / ElementsPerByte
439439
// length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count
440-
const int elementsPerByte = sizeof(ushort) / sizeof(byte);
441-
int unaligned = ((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / elementsPerByte;
442-
length = (Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1);
440+
const int ElementsPerByte = sizeof(ushort) / sizeof(byte);
441+
int unaligned = (int)((uint)((int)pChars & (Unsafe.SizeOf<Vector<ushort>>() - 1)) / ElementsPerByte);
442+
lengthToExamine = (uint)((Vector<ushort>.Count - unaligned) & (Vector<ushort>.Count - 1));
443443
}
444444

445-
SequentialScan:
446-
while (length >= 4)
445+
while (lengthToExamine >= 4)
447446
{
448-
length -= 4;
447+
lengthToExamine -= 4;
448+
char* pStart = pChars + offset;
449449

450-
if (value == *pCh ||
451-
value == *(pCh + 1) ||
452-
value == *(pCh + 2) ||
453-
value == *(pCh + 3))
450+
if (value == pStart[0] ||
451+
value == pStart[1] ||
452+
value == pStart[2] ||
453+
value == pStart[3])
454454
{
455455
goto Found;
456456
}
457457

458-
pCh += 4;
458+
offset += 4;
459459
}
460460

461-
while (length > 0)
461+
while (lengthToExamine > 0)
462462
{
463-
length--;
463+
lengthToExamine--;
464464

465-
if (value == *pCh)
465+
if (value == pChars[offset])
466466
goto Found;
467467

468-
pCh++;
468+
offset++;
469469
}
470470

471471
// We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow
472-
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated.
473-
if (Vector.IsHardwareAccelerated && pCh < pEndCh)
472+
// the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware acceleration.
473+
if (Vector.IsHardwareAccelerated && (offset < (uint)length))
474474
{
475475
// Get the highest multiple of Vector<ushort>.Count that is within the search space.
476476
// That will be how many times we iterate in the loop below.
477-
// This is equivalent to: length = Vector<ushort>.Count * ((int)(pEndCh - pCh) / Vector<ushort>.Count)
478-
length = (int)((pEndCh - pCh) & ~(Vector<ushort>.Count - 1));
477+
// This is equivalent to: lengthToExamine = Vector<ushort>.Count + ((uint)length - offset) / Vector<ushort>.Count)
478+
lengthToExamine = ((uint)length - offset) & (nuint)~(Vector<ushort>.Count - 1);
479479

480-
// Get comparison Vector
481-
Vector<ushort> vComparison = new Vector<ushort>(value);
480+
Vector<ushort> values = new(value);
481+
Vector<ushort> matches;
482482

483-
while (length > 0)
483+
while (offset < lengthToExamine)
484484
{
485485
// Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh is always vector aligned
486-
Debug.Assert(((int)pCh & (Unsafe.SizeOf<Vector<ushort>>() - 1)) == 0);
487-
Vector<ushort> vMatches = Vector.Equals(vComparison, Unsafe.Read<Vector<ushort>>(pCh));
488-
if (Vector<ushort>.Zero.Equals(vMatches))
486+
Debug.Assert(((int)(pChars + offset) % Unsafe.SizeOf<Vector<ushort>>()) == 0);
487+
matches = Vector.Equals(values, Unsafe.Read<Vector<ushort>>(pChars + offset));
488+
if (matches == Vector<ushort>.Zero)
489489
{
490-
pCh += Vector<ushort>.Count;
491-
length -= Vector<ushort>.Count;
490+
offset += (nuint)Vector<ushort>.Count;
492491
continue;
493492
}
494493

495494
goto Found;
496495
}
497496

498-
if (pCh < pEndCh)
497+
// The total length is at least Vector<ushort>.Count, so instead of falling back to a
498+
// sequential scan for the remainder, we check the vector read from the end -- note: unaligned read necessary.
499+
// We do this only if at least one element is left.
500+
if (offset < (uint)length)
499501
{
500-
length = (int)(pEndCh - pCh);
501-
goto SequentialScan;
502+
matches = Vector.Equals(values, Unsafe.ReadUnaligned<Vector<ushort>>(pChars + (uint)length - (uint)Vector<ushort>.Count));
503+
if (matches != Vector<ushort>.Zero)
504+
{
505+
goto Found;
506+
}
502507
}
503508
}
504509

505510
return false;
506511

507-
Found:
512+
Found:
508513
return true;
509514
}
510515
}

0 commit comments

Comments
 (0)