Skip to content

Commit a5158df

Browse files
authored
'cmeq' and 'fcmeq' Vector64<T>.Zero/Vector128<T>.Zero ARM64 containment optimizations (#62933)
* Initial work * Added a comma to display * Cleanup * Fixing build * More cleanup * Update comment * Update comment * Added CompareEqual Vector64/128 with Zero tests * Do not contain op1 for now * Wrong intrinsic id used * Removing generated tests * Removing generated tests * Added CompareEqual tests * Supporting containment for first operand * Fix test build * Passing correct register * Check IsVectorZero before not allocing a register * Update comment * Fixing test * Minor format change * Fixed formatting * Renamed test * Adding AdvSimd_Arm64 tests: * Adding support for rest of 'cmeq' and 'fcmeq' instructions * Removing github csproj * Minor test fix * Fixed tests * Fix print * Minor format change * Fixing test * Added some emitter tests * Feedback * Update emitarm64.cpp * Feedback
1 parent 68a923a commit a5158df

13 files changed

Lines changed: 785 additions & 54 deletions

src/coreclr/jit/codegenarm64.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6922,9 +6922,12 @@ void CodeGen::genArm64EmitterUnitTests()
69226922

69236923
#ifdef ALL_ARM64_EMITTER_UNIT_TESTS
69246924
//
6925-
// R_R fmov/fcmp/fcvt
6925+
// R_R cmeq/fmov/fcmp/fcvt
69266926
//
69276927

6928+
// cmeq scalar
6929+
theEmitter->emitIns_R_R(INS_cmeq, EA_8BYTE, REG_V0, REG_V1);
6930+
69286931
// fmov to vector to vector
69296932
theEmitter->emitIns_Mov(INS_fmov, EA_8BYTE, REG_V0, REG_V2, /* canSkip */ false);
69306933
theEmitter->emitIns_Mov(INS_fmov, EA_4BYTE, REG_V1, REG_V3, /* canSkip */ false);

src/coreclr/jit/codegenlinear.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1657,7 +1657,7 @@ void CodeGen::genConsumeRegs(GenTree* tree)
16571657
#ifdef FEATURE_SIMD
16581658
// (In)Equality operation that produces bool result, when compared
16591659
// against Vector zero, marks its Vector Zero operand as contained.
1660-
assert(tree->OperIsLeaf() || tree->IsSIMDZero());
1660+
assert(tree->OperIsLeaf() || tree->IsSIMDZero() || tree->IsVectorZero());
16611661
#else
16621662
assert(tree->OperIsLeaf());
16631663
#endif

src/coreclr/jit/emitarm64.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4732,19 +4732,19 @@ void emitter::emitIns_R_R(
47324732
assert(isVectorRegister(reg1));
47334733
assert(isVectorRegister(reg2));
47344734

4735-
if (isValidVectorDatasize(size))
4735+
if (insOptsAnyArrangement(opt))
47364736
{
47374737
// Vector operation
4738-
assert(insOptsAnyArrangement(opt));
4738+
assert(isValidVectorDatasize(size));
47394739
assert(isValidArrangement(size, opt));
47404740
elemsize = optGetElemsize(opt);
47414741
fmt = IF_DV_2M;
47424742
}
47434743
else
47444744
{
4745-
NYI("Untested");
47464745
// Scalar operation
4747-
assert(size == EA_8BYTE); // Only Double supported
4746+
assert(size == EA_8BYTE);
4747+
assert(insOptsNone(opt));
47484748
fmt = IF_DV_2L;
47494749
}
47504750
break;
@@ -12971,6 +12971,11 @@ void emitter::emitDispIns(
1297112971
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
1297212972
emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
1297312973
}
12974+
if (ins == INS_fcmeq)
12975+
{
12976+
printf(", ");
12977+
emitDispImm(0, false);
12978+
}
1297412979
break;
1297512980

1297612981
case IF_DV_2P: // DV_2P ................ ......nnnnnddddd Vd Vn (aes*, sha1su1)
@@ -12990,6 +12995,11 @@ void emitter::emitDispIns(
1299012995
emitDispVectorReg(id->idReg1(), id->idInsOpt(), true);
1299112996
emitDispVectorReg(id->idReg2(), id->idInsOpt(), false);
1299212997
}
12998+
if (ins == INS_cmeq)
12999+
{
13000+
printf(", ");
13001+
emitDispImm(0, false);
13002+
}
1299313003
break;
1299413004

1299513005
case IF_DV_2N: // DV_2N .........iiiiiii ......nnnnnddddd Vd Vn imm (shift - scalar)
@@ -13126,6 +13136,11 @@ void emitter::emitDispIns(
1312613136
emitDispReg(id->idReg1(), size, true);
1312713137
emitDispReg(id->idReg2(), size, false);
1312813138
}
13139+
if (fmt == IF_DV_2L && ins == INS_cmeq)
13140+
{
13141+
printf(", ");
13142+
emitDispImm(0, false);
13143+
}
1312913144
break;
1313013145

1313113146
case IF_DV_2H: // DV_2H X........X...... ......nnnnnddddd Rd Vn (fmov, fcvtXX - to general)

src/coreclr/jit/gentree.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17735,6 +17735,20 @@ bool GenTree::isContainableHWIntrinsic() const
1773517735
return true;
1773617736
}
1773717737

17738+
default:
17739+
{
17740+
return false;
17741+
}
17742+
}
17743+
#elif TARGET_ARM64
17744+
switch (AsHWIntrinsic()->GetHWIntrinsicId())
17745+
{
17746+
case NI_Vector64_get_Zero:
17747+
case NI_Vector128_get_Zero:
17748+
{
17749+
return true;
17750+
}
17751+
1773817752
default:
1773917753
{
1774017754
return false;

src/coreclr/jit/gentree.h

Lines changed: 77 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1704,6 +1704,8 @@ struct GenTree
17041704
inline bool IsIntegralConst(ssize_t constVal) const;
17051705
inline bool IsIntegralConstVector(ssize_t constVal) const;
17061706
inline bool IsSIMDZero() const;
1707+
inline bool IsFloatPositiveZero() const;
1708+
inline bool IsVectorZero() const;
17071709

17081710
inline bool IsBoxedValue();
17091711

@@ -2097,7 +2099,7 @@ struct GenTree
20972099

20982100
inline bool IsCnsFltOrDbl() const;
20992101

2100-
inline bool IsCnsNonZeroFltOrDbl();
2102+
inline bool IsCnsNonZeroFltOrDbl() const;
21012103

21022104
bool IsIconHandle() const
21032105
{
@@ -7650,6 +7652,79 @@ inline bool GenTree::IsSIMDZero() const
76507652
return false;
76517653
}
76527654

7655+
//-------------------------------------------------------------------
7656+
// IsFloatPositiveZero: returns true if this is exactly a const float value of postive zero (+0.0)
7657+
//
7658+
// Returns:
7659+
// True if this represents a const floating-point value of exactly positive zero (+0.0).
7660+
// Will return false if the value is negative zero (-0.0).
7661+
//
7662+
inline bool GenTree::IsFloatPositiveZero() const
7663+
{
7664+
return !(IsCnsNonZeroFltOrDbl());
7665+
}
7666+
7667+
//-------------------------------------------------------------------
7668+
// IsVectorZero: returns true if this is an integral or floating-point (SIMD or HW intrinsic) vector
7669+
// with all its elements equal to zero.
7670+
//
7671+
// Returns:
7672+
// True if this represents an integral or floating-point const (SIMD or HW intrinsic) vector with all its elements
7673+
// equal to zero.
7674+
//
7675+
// TODO: We already have IsSIMDZero() and IsIntegralConstVector(0),
7676+
// however, IsSIMDZero() does not cover hardware intrinsics, and IsIntegralConstVector(0) does not cover floating
7677+
// point. In order to not risk adverse behaviour by modifying those, this function 'IsVectorZero' was introduced.
7678+
// At some point, it makes sense to normalize this logic to be a single function call rather than have several
7679+
// separate ones; preferably this one.
7680+
inline bool GenTree::IsVectorZero() const
7681+
{
7682+
#ifdef FEATURE_SIMD
7683+
if (gtOper == GT_SIMD)
7684+
{
7685+
const GenTreeSIMD* node = AsSIMD();
7686+
7687+
if (node->GetSIMDIntrinsicId() == SIMDIntrinsicInit)
7688+
{
7689+
return (node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero());
7690+
}
7691+
}
7692+
#endif
7693+
7694+
#ifdef FEATURE_HW_INTRINSICS
7695+
if (gtOper == GT_HWINTRINSIC)
7696+
{
7697+
const GenTreeHWIntrinsic* node = AsHWIntrinsic();
7698+
const var_types simdBaseType = node->GetSimdBaseType();
7699+
7700+
if (varTypeIsIntegral(simdBaseType) || varTypeIsFloating(simdBaseType))
7701+
{
7702+
const NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
7703+
7704+
if (node->GetOperandCount() == 0)
7705+
{
7706+
#if defined(TARGET_XARCH)
7707+
return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero);
7708+
#elif defined(TARGET_ARM64)
7709+
return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero);
7710+
#endif // !TARGET_XARCH && !TARGET_ARM64
7711+
}
7712+
else if ((node->GetOperandCount() == 1) &&
7713+
(node->Op(1)->IsIntegralConst(0) || node->Op(1)->IsFloatPositiveZero()))
7714+
{
7715+
#if defined(TARGET_XARCH)
7716+
return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create);
7717+
#elif defined(TARGET_ARM64)
7718+
return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create);
7719+
#endif // !TARGET_XARCH && !TARGET_ARM64
7720+
}
7721+
}
7722+
}
7723+
#endif // FEATURE_HW_INTRINSICS
7724+
7725+
return false;
7726+
}
7727+
76537728
inline bool GenTree::IsBoxedValue()
76547729
{
76557730
assert(gtOper != GT_BOX || AsBox()->BoxOp() != nullptr);
@@ -8328,7 +8403,7 @@ inline bool GenTree::IsCnsFltOrDbl() const
83288403
return OperGet() == GT_CNS_DBL;
83298404
}
83308405

8331-
inline bool GenTree::IsCnsNonZeroFltOrDbl()
8406+
inline bool GenTree::IsCnsNonZeroFltOrDbl() const
83328407
{
83338408
if (OperGet() == GT_CNS_DBL)
83348409
{

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,29 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
496496
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
497497
break;
498498

499+
case NI_AdvSimd_CompareEqual:
500+
case NI_AdvSimd_Arm64_CompareEqual:
501+
case NI_AdvSimd_Arm64_CompareEqualScalar:
502+
if (intrin.op1->isContained())
503+
{
504+
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
505+
assert(intrin.op1->IsVectorZero());
506+
507+
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op2Reg, opt);
508+
}
509+
else if (intrin.op2->isContained())
510+
{
511+
assert(HWIntrinsicInfo::SupportsContainment(intrin.id));
512+
assert(intrin.op2->IsVectorZero());
513+
514+
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
515+
}
516+
else
517+
{
518+
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
519+
}
520+
break;
521+
499522
case NI_AdvSimd_AbsoluteCompareLessThan:
500523
case NI_AdvSimd_AbsoluteCompareLessThanOrEqual:
501524
case NI_AdvSimd_CompareLessThan:

src/coreclr/jit/hwintrinsiclistarm64.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ HARDWARE_INTRINSIC(AdvSimd, BitwiseClear,
240240
HARDWARE_INTRINSIC(AdvSimd, BitwiseSelect, -1, 3, {INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl, INS_bsl}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
241241
HARDWARE_INTRINSIC(AdvSimd, Ceiling, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
242242
HARDWARE_INTRINSIC(AdvSimd, CeilingScalar, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp, INS_frintp}, HW_Category_SIMD, HW_Flag_SIMDScalar)
243-
HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative)
243+
HARDWARE_INTRINSIC(AdvSimd, CompareEqual, -1, 2, {INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_cmeq, INS_invalid, INS_invalid, INS_fcmeq, INS_invalid}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
244244
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
245245
HARDWARE_INTRINSIC(AdvSimd, CompareGreaterThanOrEqual, -1, 2, {INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_cmge, INS_cmhs, INS_invalid, INS_invalid, INS_fcmge, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
246246
HARDWARE_INTRINSIC(AdvSimd, CompareLessThan, -1, 2, {INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_cmgt, INS_cmhi, INS_invalid, INS_invalid, INS_fcmgt, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
@@ -492,8 +492,8 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, AddPairwiseScalar,
492492
HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturate, -1, 2, {INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics)
493493
HARDWARE_INTRINSIC(AdvSimd_Arm64, AddSaturateScalar, 8, 2, {INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_sqadd, INS_uqadd, INS_suqadd, INS_usqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
494494
HARDWARE_INTRINSIC(AdvSimd_Arm64, Ceiling, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_frintp}, HW_Category_SIMD, HW_Flag_NoFlag)
495-
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative)
496-
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar)
495+
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_invalid, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
496+
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmeq, INS_cmeq, INS_fcmeq, INS_fcmeq}, HW_Category_SIMD, HW_Flag_Commutative|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
497497
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThan, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmgt, INS_cmhi, INS_invalid, INS_fcmgt}, HW_Category_SIMD, HW_Flag_NoFlag)
498498
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqual, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_invalid, INS_fcmge}, HW_Category_SIMD, HW_Flag_NoFlag)
499499
HARDWARE_INTRINSIC(AdvSimd_Arm64, CompareGreaterThanOrEqualScalar, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmge, INS_cmhs, INS_fcmge, INS_fcmge}, HW_Category_SIMD, HW_Flag_SIMDScalar)

src/coreclr/jit/instrsarm64.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -312,8 +312,8 @@ INST4(neg, "neg", 0, IF_EN4G, 0x4B0003E0, 0x4B0003E0,
312312
INST4(cmeq, "cmeq", 0, IF_EN4H, 0x7EE08C00, 0x2E208C00, 0x5E209800, 0x0E209800)
313313
// cmeq Vd,Vn,Vm DV_3E 01111110111mmmmm 100011nnnnnddddd 7EE0 8C00 Vd,Vn,Vm (scalar)
314314
// cmeq Vd,Vn,Vm DV_3A 0Q101110XX1mmmmm 100011nnnnnddddd 2E20 8C00 Vd,Vn,Vm (vector)
315-
// cmeq Vd,Vn DV_2L 01011110XX100000 100110nnnnnddddd 5E20 9800 Vd,Vn (scalar)
316-
// cmeq Vd,Vn DV_2M 0Q001110XX100000 100110nnnnnddddd 0E20 9800 Vd,Vn (vector)
315+
// cmeq Vd,Vn,#0 DV_2L 01011110XX100000 100110nnnnnddddd 5E20 9800 Vd,Vn,#0 (scalar - with zero)
316+
// cmeq Vd,Vn,#0 DV_2M 0Q001110XX100000 100110nnnnnddddd 0E20 9800 Vd,Vn,#0 (vector - with zero)
317317

318318
INST4(cmge, "cmge", 0, IF_EN4H, 0x5EE03C00, 0x0E203C00, 0x7E208800, 0x2E208800)
319319
// cmge Vd,Vn,Vm DV_3E 01011110111mmmmm 001111nnnnnddddd 5EE0 3C00 Vd,Vn,Vm (scalar)
@@ -331,8 +331,8 @@ INST4(cmgt, "cmgt", 0, IF_EN4H, 0x5EE03400, 0x0E203400,
331331
INST4(fcmeq, "fcmeq", 0, IF_EN4I, 0x5E20E400, 0x0E20E400, 0x5EA0D800, 0x0EA0D800)
332332
// fcmeq Vd,Vn,Vm DV_3D 010111100X1mmmmm 111001nnnnnddddd 5E20 E400 Vd Vn Vm (scalar)
333333
// fcmeq Vd,Vn,Vm DV_3B 0Q0011100X1mmmmm 111001nnnnnddddd 0E20 E400 Vd,Vn,Vm (vector)
334-
// fcmeq Vd,Vn DV_2G 010111101X100000 110110nnnnnddddd 5EA0 D800 Vd Vn (scalar)
335-
// fcmeq Vd,Vn DV_2A 0Q0011101X100000 110110nnnnnddddd 0EA0 D800 Vd Vn (vector)
334+
// fcmeq Vd,Vn,#0 DV_2G 010111101X100000 110110nnnnnddddd 5EA0 D800 Vd Vn,#0 (scalar - with zero)
335+
// fcmeq Vd,Vn,#0 DV_2A 0Q0011101X100000 110110nnnnnddddd 0EA0 D800 Vd Vn,#0 (vector - with zero)
336336

337337
INST4(fcmge, "fcmge", 0, IF_EN4I, 0x7E20E400, 0x2E20E400, 0x7EA0C800, 0x2EA0C800)
338338
// fcmge Vd,Vn,Vm DV_3D 011111100X1mmmmm 111001nnnnnddddd 7E20 E400 Vd Vn Vm (scalar)

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,6 +1842,21 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
18421842
MakeSrcContained(node, intrin.op4);
18431843
break;
18441844

1845+
case NI_AdvSimd_CompareEqual:
1846+
case NI_AdvSimd_Arm64_CompareEqual:
1847+
case NI_AdvSimd_Arm64_CompareEqualScalar:
1848+
{
1849+
if (intrin.op1->IsVectorZero())
1850+
{
1851+
MakeSrcContained(node, intrin.op1);
1852+
}
1853+
else if (intrin.op2->IsVectorZero())
1854+
{
1855+
MakeSrcContained(node, intrin.op2);
1856+
}
1857+
break;
1858+
}
1859+
18451860
case NI_Vector64_CreateScalarUnsafe:
18461861
case NI_Vector128_CreateScalarUnsafe:
18471862
case NI_AdvSimd_DuplicateToVector64:

0 commit comments

Comments
 (0)