Skip to content

Commit e138ff1

Browse files
Implement StoreVectorNxM for Arm64 (#94129)
* Implement StoreVectorNxM for Arm64 * Use ST1 instruction variants while for StoreVectorNxM API calls * Fix formatting * Add minor fixes * Update comments for LoadVectorNxM & LoadVectorAndUnzipNxM
1 parent df30ff8 commit e138ff1

11 files changed

Lines changed: 1370 additions & 264 deletions

File tree

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1745,7 +1745,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
17451745
}
17461746

17471747
case NI_AdvSimd_StoreVector64x2:
1748+
case NI_AdvSimd_StoreVector64x3:
1749+
case NI_AdvSimd_StoreVector64x4:
17481750
case NI_AdvSimd_Arm64_StoreVector128x2:
1751+
case NI_AdvSimd_Arm64_StoreVector128x3:
1752+
case NI_AdvSimd_Arm64_StoreVector128x4:
17491753
{
17501754
assert(sig->numArgs == 2);
17511755
assert(retType == TYP_VOID);

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
797797
break;
798798

799799
case NI_AdvSimd_StoreVector64x2:
800+
case NI_AdvSimd_StoreVector64x3:
801+
case NI_AdvSimd_StoreVector64x4:
800802
case NI_AdvSimd_Arm64_StoreVector128x2:
803+
case NI_AdvSimd_Arm64_StoreVector128x3:
804+
case NI_AdvSimd_Arm64_StoreVector128x4:
801805
{
802806
unsigned regCount = 0;
803807

@@ -817,7 +821,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
817821
assert(argReg == argNode->GetRegNum());
818822
argReg = REG_NEXT(argReg);
819823
}
820-
assert(regCount == 2);
824+
assert((ins == INS_st1_2regs && regCount == 2) || (ins == INS_st1_3regs && regCount == 3) ||
825+
(ins == INS_st1_4regs && regCount == 4));
821826
#endif
822827

823828
GetEmitter()->emitIns_R_R(ins, emitSize, op2Reg, op1Reg, opt);

src/coreclr/jit/hwintrinsiclistarm64.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,9 @@ HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper,
479479
HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar)
480480
HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen)
481481
HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, -1, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
482-
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2, 8, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
482+
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x2, 8, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_invalid, INS_invalid, INS_st1_2regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
483+
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x3, 8, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_invalid, INS_invalid, INS_st1_3regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
484+
HARDWARE_INTRINSIC(AdvSimd, StoreVector64x4, 8, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_invalid, INS_invalid, INS_st1_4regs, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
483485
HARDWARE_INTRINSIC(AdvSimd, Subtract, -1, 2, true, {INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_sub, INS_fsub, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
484486
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingLower, 8, 2, true, {INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_subhn, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_NoFlag)
485487
HARDWARE_INTRINSIC(AdvSimd, SubtractHighNarrowingUpper, 16, 3, true, {INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_subhn2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics)
@@ -671,7 +673,9 @@ HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePair,
671673
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalar, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stp, INS_stp, INS_invalid, INS_invalid, INS_stp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
672674
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairScalarNonTemporal, 8, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_stnp, INS_stnp, INS_invalid, INS_invalid, INS_stnp, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
673675
HARDWARE_INTRINSIC(AdvSimd_Arm64, StorePairNonTemporal, -1, 3, true, {INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stnp, INS_stp}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg|HW_Flag_SpecialCodeGen)
674-
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2, 16, 2, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
676+
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x2, 16, 2, true, {INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs, INS_st1_2regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
677+
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x3, 16, 2, true, {INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs, INS_st1_3regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
678+
HARDWARE_INTRINSIC(AdvSimd_Arm64, StoreVector128x4, 16, 2, true, {INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs, INS_st1_4regs}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NeedsConsecutiveRegisters)
675679
HARDWARE_INTRINSIC(AdvSimd_Arm64, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsub}, HW_Category_SIMD, HW_Flag_NoFlag)
676680
HARDWARE_INTRINSIC(AdvSimd_Arm64, SubtractSaturateScalar, 8, 2, true, {INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_sqsub, INS_uqsub, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_SIMDScalar)
677681
HARDWARE_INTRINSIC(AdvSimd_Arm64, TransposeEven, -1, 2, true, {INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1, INS_trn1}, HW_Category_SIMD, HW_Flag_NoFlag)

src/coreclr/jit/lsraarm64.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1598,7 +1598,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
15981598
*pDstCount = 1;
15991599
break;
16001600
case NI_AdvSimd_StoreVector64x2:
1601+
case NI_AdvSimd_StoreVector64x3:
1602+
case NI_AdvSimd_StoreVector64x4:
16011603
case NI_AdvSimd_Arm64_StoreVector128x2:
1604+
case NI_AdvSimd_Arm64_StoreVector128x3:
1605+
case NI_AdvSimd_Arm64_StoreVector128x4:
16021606
assert(intrin.op1 != nullptr);
16031607
srcCount += BuildConsecutiveRegistersForUse(intrin.op2);
16041608
assert(dstCount == 0);

0 commit comments

Comments
 (0)