From fa8374316817a66661f93ae4fbab9fd0525903c9 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 11 Jun 2024 08:29:16 -0700 Subject: [PATCH 1/6] Share more of the TYP_MASK handling and support rewriting TYP_MASK operands in rationalization --- src/coreclr/jit/compiler.h | 14 +- src/coreclr/jit/gentree.cpp | 84 +++++++- src/coreclr/jit/gentree.h | 2 + src/coreclr/jit/hwintrinsic.cpp | 13 +- src/coreclr/jit/hwintrinsicarm64.cpp | 44 +--- src/coreclr/jit/hwintrinsicxarch.cpp | 56 ++---- src/coreclr/jit/morph.cpp | 290 ++++++++++++--------------- src/coreclr/jit/rationalize.cpp | 45 ++++- src/coreclr/jit/valuenum.cpp | 19 +- 9 files changed, 285 insertions(+), 282 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 75fa2d9e8e2a48..ed907fa43fa7e4 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3185,6 +3185,10 @@ class Compiler GenTree* gtNewSimdAbsNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); +#if defined(TARGET_ARM64) + GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); +#endif + GenTree* gtNewSimdBinOpNode(genTreeOps op, var_types type, GenTree* op1, @@ -3223,6 +3227,8 @@ class Compiler CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdCvtMaskToVectorNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdCvtNode(var_types type, GenTree* op1, CorInfoType simdTargetBaseJitType, @@ -3235,6 +3241,8 @@ class Compiler CorInfoType simdSourceBaseJitType, unsigned simdSize); + GenTree* gtNewSimdCvtVectorToMaskNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdCreateBroadcastNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); @@ -3516,12 +3524,6 @@ class Compiler GenTreeIndir* gtNewMethodTableLookup(GenTree* obj); -#if defined(TARGET_ARM64) - GenTree* gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize); - GenTree* gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type); - GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); -#endif - //------------------------------------------------------------------------ // Other GenTree functions diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 98d6bb788d450c..33a64dff02b7cf 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21530,6 +21530,35 @@ GenTree* Compiler::gtNewSimdCeilNode(var_types type, GenTree* op1, CorInfoType s return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } +//------------------------------------------------------------------------ +// gtNewSimdCvtMaskToVectorNode: Convert a HW instrinsic mask node to a vector +// +// Arguments: +// type -- The type of the node to convert to +// op1 -- The node to convert +// simdBaseJitType -- the base jit type of the converted node +// simdSize -- the simd size of the converted node +// +// Return Value: +// The node converted to the given type +// +GenTree* Compiler::gtNewSimdCvtMaskToVectorNode(var_types type, + GenTree* op1, + CorInfoType simdBaseJitType, + unsigned simdSize) +{ + assert(varTypeIsMask(op1)); + assert(varTypeIsSIMD(type)); + +#if defined(TARGET_XARCH) + return gtNewSimdHWIntrinsicNode(type, op1, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); +#elif defined(TARGET_ARM64) + return gtNewSimdHWIntrinsicNode(type, op1, NI_Sve_ConvertMaskToVector, simdBaseJitType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 +} + GenTree* Compiler::gtNewSimdCvtNode(var_types type, GenTree* op1, CorInfoType simdTargetBaseJitType, @@ -21892,6 +21921,37 @@ GenTree* Compiler::gtNewSimdCvtNativeNode(var_types type, return gtNewSimdHWIntrinsicNode(type, op1, hwIntrinsicID, simdSourceBaseJitType, simdSize); } +//------------------------------------------------------------------------ +// gtNewSimdCvtVectorToMaskNode: Convert a HW instrinsic vector node to a mask +// +// Arguments: +// type -- The type of the mask to produce. +// op1 -- The node to convert +// simdBaseJitType -- the base jit type of the converted node +// simdSize -- the simd size of the converted node +// +// Return Value: +// The node converted to the a mask type +// +GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, + GenTree* op1, + CorInfoType simdBaseJitType, + unsigned simdSize) +{ + assert(varTypeIsMask(type)); + assert(varTypeIsSIMD(op1)); + +#if defined(TARGET_XARCH) + return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_EVEX_ConvertVectorToMask, simdBaseJitType, simdSize); +#elif defined(TARGET_ARM64) + // We use cmpne which requires an embedded mask. + GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(TYP_MASK, trueMask, op1, NI_Sve_ConvertVectorToMask, simdBaseJitType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 +} + GenTree* Compiler::gtNewSimdCmpOpNode( genTreeOps op, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) { @@ -22569,19 +22629,15 @@ GenTree* Compiler::gtNewSimdCmpOpNode( assert(intrinsic != NI_Illegal); -#if defined(TARGET_XARCH) if (needsConvertMaskToVector) { GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, intrinsic, simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(type, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); + return gtNewSimdCvtMaskToVectorNode(type, retNode, simdBaseJitType, simdSize); } else { return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); } -#else - return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize); -#endif } GenTree* Compiler::gtNewSimdCmpOpAllNode( @@ -27157,6 +27213,20 @@ bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const } } +//------------------------------------------------------------------------ +// OperIsBitwiseHWIntrinsic: Is the operation a bitwise logic operation. +// +// Arguments: +// oper -- The operation to check +// +// Return Value: +// Whether oper is a bitwise logic intrinsic node. +// +bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(genTreeOps oper) +{ + return (oper == GT_AND) || (oper == GT_AND_NOT) || (oper == GT_OR) || (oper == GT_XOR); +} + //------------------------------------------------------------------------ // OperIsBitwiseHWIntrinsic: Is this HWIntrinsic a bitwise logic intrinsic node. // @@ -27165,8 +27235,8 @@ bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const // bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic() const { - genTreeOps Oper = HWOperGet(); - return Oper == GT_AND || Oper == GT_OR || Oper == GT_XOR || Oper == GT_AND_NOT; + genTreeOps oper = HWOperGet(); + return OperIsBitwiseHWIntrinsic(oper); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index cc1c4917604ace..cb4b5584761d94 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -6505,6 +6505,8 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic } #endif + static bool OperIsBitwiseHWIntrinsic(genTreeOps oper); + bool OperIsMemoryLoad(GenTree** pAddr = nullptr) const; bool OperIsMemoryStore(GenTree** pAddr = nullptr) const; bool OperIsMemoryLoadOrStore() const; diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 48dd3cffa10465..1ddd5f04c5a740 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1657,15 +1657,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic requires a mask for op2 if (!varTypeIsMask(op2)) { - retNode->AsHWIntrinsic()->Op(2) = - gtNewSimdConvertVectorToMaskNode(retType, op2, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->Op(2) = gtNewSimdCvtVectorToMaskNode(retType, op2, simdBaseJitType, simdSize); } } if (!varTypeIsMask(op1)) { // Op1 input is a vector. HWInstrinsic requires a mask. - retNode->AsHWIntrinsic()->Op(1) = gtNewSimdConvertVectorToMaskNode(retType, op1, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->Op(1) = gtNewSimdCvtVectorToMaskNode(retType, op1, simdBaseJitType, simdSize); } if (HWIntrinsicInfo::IsMultiReg(intrinsic)) @@ -1682,7 +1681,13 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); assert(nodeRetType == TYP_MASK); - retNode = gtNewSimdConvertMaskToVectorNode(retNode->AsHWIntrinsic(), retType); + + GenTreeHWIntrinsic* op = retNode->AsHWIntrinsic(); + + CorInfoType simdBaseJitType = op->GetSimdBaseJitType(); + unsigned simdSize = op->GetSimdSize(); + + retNode = gtNewSimdCvtMaskToVectorNode(retType, op, simdBaseJitType, simdSize); } #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 095f31246d0c68..5680bfd5fcb0f4 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2582,7 +2582,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic requires a mask for op2 if (!varTypeIsMask(op2)) { - op2 = gtNewSimdConvertVectorToMaskNode(retType, op2, simdBaseJitType, simdSize); + op2 = gtNewSimdCvtVectorToMaskNode(retType, op2, simdBaseJitType, simdSize); } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); @@ -2603,48 +2603,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return retNode; } -//------------------------------------------------------------------------ -// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic vector node to a mask -// -// Arguments: -// node -- The node to convert -// simdBaseJitType -- the base jit type of the converted node -// simdSize -- the simd size of the converted node -// -// Return Value: -// The node converted to the a mask type -// -GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types type, - GenTree* node, - CorInfoType simdBaseJitType, - unsigned simdSize) -{ - assert(varTypeIsSIMD(node)); - - // ConvertVectorToMask uses cmpne which requires an embedded mask. - GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); - return gtNewSimdHWIntrinsicNode(TYP_MASK, trueMask, node, NI_Sve_ConvertVectorToMask, simdBaseJitType, simdSize); -} - -//------------------------------------------------------------------------ -// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic mask node to a vector -// -// Arguments: -// node -- The node to convert -// type -- The type of the node to convert to -// -// Return Value: -// The node converted to the given type -// -GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type) -{ - assert(varTypeIsMask(node)); - assert(varTypeIsSIMD(type)); - - return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, node->GetSimdBaseJitType(), - node->GetSimdSize()); -} - //------------------------------------------------------------------------ // gtNewSimdEmbeddedMaskNode: Create an embedded mask // diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index f96a0a1a199d42..739f4db2c153ec 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -2043,8 +2043,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (!varTypeIsMask(op1)) { - op1 = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_EVEX_ConvertVectorToMask, simdBaseJitType, simdSize); + op1 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); } retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_EVEX_MoveMask, simdBaseJitType, simdSize); } @@ -4141,7 +4140,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (!varTypeIsMask(op3)) { - op3 = gtNewSimdHWIntrinsicNode(TYP_MASK, op3, NI_EVEX_ConvertVectorToMask, simdBaseJitType, simdSize); + op3 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op3, simdBaseJitType, simdSize); } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, NI_EVEX_BlendVariableMask, simdBaseJitType, simdSize); @@ -4157,8 +4156,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, op3, NI_EVEX_CompareMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4172,9 +4170,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impSIMDPopStack(); retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareEqualMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4191,9 +4187,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareGreaterThanMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4210,9 +4204,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareGreaterThanOrEqualMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4229,9 +4221,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareLessThanMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4248,9 +4238,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareLessThanOrEqualMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4267,9 +4255,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotEqualMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4282,9 +4268,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotGreaterThanMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4297,9 +4281,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotGreaterThanOrEqualMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4312,9 +4294,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotLessThanMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4327,9 +4307,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareNotLessThanOrEqualMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4342,9 +4320,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareOrderedMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } @@ -4357,9 +4333,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, NI_EVEX_CompareUnorderedMask, simdBaseJitType, simdSize); - retNode = - gtNewSimdHWIntrinsicNode(retType, retNode, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); break; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8f0d1009d0f634..1ffe9a8f0be815 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10676,220 +10676,181 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) INDEBUG(node->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); return node; } -#if defined(TARGET_ARM64) - case NI_Sve_ConvertMaskToVector: + + default: { - GenTree* op1 = node->Op(1); + genTreeOps oper = node->HWOperGet(); - if (!op1->OperIsHWIntrinsic(NI_Sve_ConvertVectorToMask)) + if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper)) { - break; - } + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); - unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); - GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); - - if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize)) - { - // We need the operand to be the same kind of mask; otherwise - // the bitwise operation can differ in how it performs - break; - } + if (!op1->OperIsHWIntrinsic() || !op2->OperIsHWIntrinsic()) + { + // We need both operands to be ConvertMaskToVector in + // order to optimize this to a direct mask operation + break; + } - GenTree* vectorNode = op1->AsHWIntrinsic()->Op(1); + GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); + GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic(); - DEBUG_DESTROY_NODE(op1, node); - INDEBUG(vectorNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + if (!cvtOp1->OperIsConvertMaskToVector() || !cvtOp2->OperIsConvertMaskToVector()) + { + break; + } - return vectorNode; - } + unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); - case NI_Sve_ConvertVectorToMask: - { - GenTree* op1 = node->Op(1); - GenTree* op2 = node->Op(2); + if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize) || + (genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize)) + { + // We need both operands to be the same kind of mask; otherwise + // the bitwise operation can differ in how it performs + break; + } - if (!op1->OperIsHWIntrinsic(NI_Sve_CreateTrueMaskAll) || - !op2->OperIsHWIntrinsic(NI_Sve_ConvertMaskToVector)) - { - break; - } + var_types simdType = node->TypeGet(); + CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); + unsigned simdSize = node->GetSimdSize(); - unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); + NamedIntrinsic maskIntrinsicId = NI_Illegal; - if (!op2->OperIsHWIntrinsic() || (genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()) != simdBaseTypeSize)) - { - // We need the operand to be the same kind of mask; otherwise - // the bitwise operation can differ in how it performs - break; - } +#if defined(TARGET_XARCH) + switch (oper) + { + case GT_AND: + { + maskIntrinsicId = NI_EVEX_AndMask; + break; + } - GenTree* maskNode = op2->AsHWIntrinsic()->Op(1); + case GT_AND_NOT: + { + maskIntrinsicId = NI_EVEX_AndNotMask; + break; + } - DEBUG_DESTROY_NODE(op2, node); - INDEBUG(maskNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + case GT_OR: + { + maskIntrinsicId = NI_EVEX_OrMask; + break; + } - return maskNode; - } -#endif -#if defined(TARGET_XARCH) - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_And: - case NI_AVX10v1_V512_AndNot: - case NI_AVX10v1_V512_Or: - case NI_AVX10v1_V512_Xor: - { - GenTree* op1 = node->Op(1); - GenTree* op2 = node->Op(2); + case GT_XOR: + { + maskIntrinsicId = NI_EVEX_XorMask; + break; + } - if (!op1->OperIsHWIntrinsic(NI_EVEX_ConvertMaskToVector) || - !op2->OperIsHWIntrinsic(NI_EVEX_ConvertMaskToVector)) - { - // We need both operands to be ConvertMaskToVector in - // order to optimize this to a direct mask operation + default: + { + unreached(); + } + } +#elif defined(TARGET_ARM64) + // TODO-ARM64-CQ: Support transforming bitwise operations on masks break; - } +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 - unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); + if (maskIntrinsicId == NI_Illegal) + { + break; + } - GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); - GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic(); + node->ChangeHWIntrinsicId(maskIntrinsicId); + node->gtType = TYP_MASK; - if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize) || - (genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize)) - { - // We need both operands to be the same kind of mask; otherwise - // the bitwise operation can differ in how it performs - break; - } + node->Op(1) = cvtOp1->Op(1); + DEBUG_DESTROY_NODE(op1); + + node->Op(2) = cvtOp2->Op(1); + DEBUG_DESTROY_NODE(op2); - var_types simdType = node->TypeGet(); - CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); - unsigned simdSize = node->GetSimdSize(); + node = gtNewSimdCvtMaskToVectorNode(simdType, node, simdBaseJitType, simdSize)->AsHWIntrinsic(); - NamedIntrinsic maskIntrinsicId = NI_Illegal; + INDEBUG(node->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return node; + } - switch (intrinsicId) + if (node->OperIsConvertMaskToVector()) { - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: - { - maskIntrinsicId = NI_EVEX_AndMask; - break; - } + GenTree* op = node->Op(1); - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: + if (!op->OperIsHWIntrinsic()) { - maskIntrinsicId = NI_EVEX_AndNotMask; break; } - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: - { - maskIntrinsicId = NI_EVEX_OrMask; - break; - } + unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); + GenTreeHWIntrinsic* cvtOp = op->AsHWIntrinsic(); - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: + if (!cvtOp->OperIsConvertVectorToMask()) { - maskIntrinsicId = NI_EVEX_XorMask; break; } - default: + if ((genTypeSize(cvtOp->GetSimdBaseType()) != simdBaseTypeSize)) { - unreached(); + // We need the operand to be the same kind of mask; otherwise + // the bitwise operation can differ in how it performs + break; } - } - - assert(maskIntrinsicId != NI_Illegal); - - node->ChangeHWIntrinsicId(maskIntrinsicId); - node->gtType = TYP_MASK; - - node->Op(1) = cvtOp1->Op(1); - DEBUG_DESTROY_NODE(op1); - - node->Op(2) = cvtOp2->Op(1); - DEBUG_DESTROY_NODE(op2); - - node = gtNewSimdHWIntrinsicNode(simdType, node, NI_EVEX_ConvertMaskToVector, simdBaseJitType, simdSize); - INDEBUG(node->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - break; - } + GenTree* vectorNode = cvtOp->Op(1); - case NI_EVEX_ConvertMaskToVector: - { - GenTree* op1 = node->Op(1); + DEBUG_DESTROY_NODE(op, node); + INDEBUG(vectorNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - if (!op1->OperIsHWIntrinsic(NI_EVEX_ConvertVectorToMask)) - { - break; + return vectorNode; } - unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); - GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); - - if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize)) + if (node->OperIsConvertVectorToMask()) { - // We need the operand to be the same kind of mask; otherwise - // the bitwise operation can differ in how it performs - break; - } - - GenTree* vectorNode = op1->AsHWIntrinsic()->Op(1); - - DEBUG_DESTROY_NODE(op1, node); - INDEBUG(vectorNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + GenTree* op = node->Op(1); - return vectorNode; - } +#if defined(TARGET_ARM64) + if (!op->OperIsHWIntrinsic(NI_Sve_CreateTrueMaskAll)) + { + break; + } + op = node->Op(2); +#endif // TARGET_ARM64 - case NI_EVEX_ConvertVectorToMask: - { - GenTree* op1 = node->Op(1); + if (!op->OperIsHWIntrinsic()) + { + break; + } - if (!op1->OperIsHWIntrinsic(NI_EVEX_ConvertMaskToVector)) - { - break; - } + unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); + GenTreeHWIntrinsic* cvtOp = op->AsHWIntrinsic(); - unsigned simdBaseTypeSize = genTypeSize(node->GetSimdBaseType()); - GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic(); + if (!cvtOp->OperIsConvertMaskToVector()) + { + break; + } - if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize)) - { - // We need the operand to be the same kind of mask; otherwise - // the bitwise operation can differ in how it performs - break; - } + if ((genTypeSize(cvtOp->GetSimdBaseType()) != simdBaseTypeSize)) + { + // We need the operand to be the same kind of mask; otherwise + // the bitwise operation can differ in how it performs + break; + } - GenTree* maskNode = op1->AsHWIntrinsic()->Op(1); + GenTree* maskNode = cvtOp->Op(1); - DEBUG_DESTROY_NODE(op1, node); - INDEBUG(maskNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); +#if defined(TARGET_ARM64) + DEBUG_DESTROY_NODE(node->Op(1)); +#endif // TARGET_ARM64 - return maskNode; - } -#endif // TARGET_XARCH + DEBUG_DESTROY_NODE(op, node); + INDEBUG(maskNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - default: - { + return maskNode; + } break; } } @@ -10989,6 +10950,7 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) return andnNode; } + default: { break; diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index f2da4a40e755f3..03623d9b3d69c4 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -38,7 +38,7 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, BlockRange().Remove(treeFirstNode, tree); // Create the call node - GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->gtType); + GenTreeCall* call = comp->gtNewCallNode(CT_USER_FUNC, callHnd, tree->TypeGet()); assert(sig != nullptr); var_types retType = JITtype2varType(sig->retType); @@ -99,6 +99,23 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, // for intrinsics that get rewritten back to user calls assert(!operand->OperIsFieldList()); + if (varTypeIsMask(operand->TypeGet())) + { +#if defined(FEATURE_HW_INTRINSICS) + // No managed call takes TYP_MASK, so convert it back to a TYP_SIMD + + unsigned simdSize; + CorInfoType simdBaseJitType = comp->getBaseJitTypeAndSizeOfSIMDType(call->gtRetClsHnd, &simdSize); + assert(simdSize != 0); + + GenTree* cvtNode = comp->gtNewSimdCvtMaskToVectorNode(sigTyp, operand, simdBaseJitType, simdSize); + BlockRange().InsertAfter(operand, LIR::Range(comp->fgSetTreeSeq(cvtNode), cvtNode)); + operand = cvtNode; +#else + unreached(); +#endif // FEATURE_HW_INTRINSICS + } + sigTyp = comp->impNormStructType(clsHnd); arg = NewCallArg::Struct(operand, sigTyp, clsHnd); } @@ -140,17 +157,41 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, // Replace "tree" with "call" if (parents.Height() > 1) { + GenTree* tmpInsertionPoint = insertionPoint; + if (tmpNum != BAD_VAR_NUM) { result = comp->gtNewLclvNode(tmpNum, retType); } + if (varTypeIsMask(tree->TypeGet())) + { +#if defined(FEATURE_HW_INTRINSICS) + // No managed call returns TYP_MASK, so convert it from a TYP_SIMD + + var_types simdType = tree->TypeGet(); + assert(varTypeIsSIMD(simdType)); + + unsigned simdSize; + CorInfoType simdBaseJitType = comp->getBaseJitTypeAndSizeOfSIMDType(call->gtRetClsHnd, &simdSize); + assert(simdSize != 0); + + GenTree* cvtNode = comp->gtNewSimdCvtVectorToMaskNode(simdType, result, simdBaseJitType, simdSize); + BlockRange().InsertAfter(insertionPoint, LIR::Range(comp->fgSetTreeSeq(cvtNode), cvtNode)); + result = cvtNode; + + tmpInsertionPoint = result; +#else + unreached(); +#endif // FEATURE_HW_INTRINSICS + } + parents.Top(1)->ReplaceOperand(use, result); if (tmpNum != BAD_VAR_NUM) { comp->gtSetEvalOrder(result); - BlockRange().InsertAfter(insertionPoint, LIR::Range(comp->fgSetTreeSeq(result), result)); + BlockRange().InsertAfter(tmpInsertionPoint, LIR::Range(comp->fgSetTreeSeq(result), result)); } } else diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 0856a61dc42c52..0c715dd6d54df3 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -12187,24 +12187,13 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) // Some intrinsics should always be unique bool makeUnique = false; -#if defined(TARGET_XARCH) - switch (intrinsicId) + if (tree->OperIsConvertMaskToVector()) { - case NI_EVEX_ConvertMaskToVector: - { - // We want to ensure that we get a TYP_MASK local to - // ensure the relevant optimizations can kick in - - makeUnique = true; - break; - } + // We want to ensure that we get a TYP_MASK local to + // ensure the relevant optimizations can kick in - default: - { - break; - } + makeUnique = true; } -#endif // TARGET_XARCH if (makeUnique) { From e56c756e48a5761746dbbe6159b488d61144c5c6 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 11 Jun 2024 12:14:03 -0700 Subject: [PATCH 2/6] Ensure we pass in TYP_MASK, not the simdType --- src/coreclr/jit/hwintrinsic.cpp | 4 ++-- src/coreclr/jit/hwintrinsicarm64.cpp | 2 +- src/coreclr/jit/rationalize.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 1ddd5f04c5a740..17783803003e20 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1657,14 +1657,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic requires a mask for op2 if (!varTypeIsMask(op2)) { - retNode->AsHWIntrinsic()->Op(2) = gtNewSimdCvtVectorToMaskNode(retType, op2, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->Op(2) = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize); } } if (!varTypeIsMask(op1)) { // Op1 input is a vector. HWInstrinsic requires a mask. - retNode->AsHWIntrinsic()->Op(1) = gtNewSimdCvtVectorToMaskNode(retType, op1, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->Op(1) = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op1, simdBaseJitType, simdSize); } if (HWIntrinsicInfo::IsMultiReg(intrinsic)) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 5680bfd5fcb0f4..e77d89432d368d 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2582,7 +2582,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic requires a mask for op2 if (!varTypeIsMask(op2)) { - op2 = gtNewSimdCvtVectorToMaskNode(retType, op2, simdBaseJitType, simdSize); + op2 = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize); } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 03623d9b3d69c4..8babf33c5afa53 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -176,7 +176,7 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, CorInfoType simdBaseJitType = comp->getBaseJitTypeAndSizeOfSIMDType(call->gtRetClsHnd, &simdSize); assert(simdSize != 0); - GenTree* cvtNode = comp->gtNewSimdCvtVectorToMaskNode(simdType, result, simdBaseJitType, simdSize); + GenTree* cvtNode = comp->gtNewSimdCvtVectorToMaskNode(TYP_MASK, result, simdBaseJitType, simdSize); BlockRange().InsertAfter(insertionPoint, LIR::Range(comp->fgSetTreeSeq(cvtNode), cvtNode)); result = cvtNode; From d9c7c688c22e5cd742754b91cd19f05a83a96924 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 11 Jun 2024 12:45:42 -0700 Subject: [PATCH 3/6] Apply formatting patch --- src/coreclr/jit/hwintrinsic.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 17783803003e20..3a4fa6aba2b711 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1657,7 +1657,8 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic requires a mask for op2 if (!varTypeIsMask(op2)) { - retNode->AsHWIntrinsic()->Op(2) = gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize); + retNode->AsHWIntrinsic()->Op(2) = + gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize); } } From cbc521385c5713381be951f41955aa47066e8a1c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 11 Jun 2024 13:33:51 -0700 Subject: [PATCH 4/6] Fix copy/paste error, pass in clsHnd for the argument --- src/coreclr/jit/rationalize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 8babf33c5afa53..40bb89bd4f6ee1 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -105,7 +105,7 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, // No managed call takes TYP_MASK, so convert it back to a TYP_SIMD unsigned simdSize; - CorInfoType simdBaseJitType = comp->getBaseJitTypeAndSizeOfSIMDType(call->gtRetClsHnd, &simdSize); + CorInfoType simdBaseJitType = comp->getBaseJitTypeAndSizeOfSIMDType(clsHnd, &simdSize); assert(simdSize != 0); GenTree* cvtNode = comp->gtNewSimdCvtMaskToVectorNode(sigTyp, operand, simdBaseJitType, simdSize); From 84657f952026ce1b17bb63580774ffa410153d28 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 11 Jun 2024 13:39:51 -0700 Subject: [PATCH 5/6] Ensure that we normalize sigType before inserting the CvtMaskToVectorNode --- src/coreclr/jit/rationalize.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 40bb89bd4f6ee1..cd23df9c2814db 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -99,6 +99,8 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, // for intrinsics that get rewritten back to user calls assert(!operand->OperIsFieldList()); + sigTyp = comp->impNormStructType(clsHnd); + if (varTypeIsMask(operand->TypeGet())) { #if defined(FEATURE_HW_INTRINSICS) @@ -115,9 +117,7 @@ void Rationalizer::RewriteNodeAsCall(GenTree** use, unreached(); #endif // FEATURE_HW_INTRINSICS } - - sigTyp = comp->impNormStructType(clsHnd); - arg = NewCallArg::Struct(operand, sigTyp, clsHnd); + arg = NewCallArg::Struct(operand, sigTyp, clsHnd); } else { From 7241d3f8d3fb42ec7fa78f771f018e6538db85f5 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 11 Jun 2024 19:22:12 -0700 Subject: [PATCH 6/6] Ensure that we get the vector node on Arm64 (ConvertVectorToMask has 2 ops) --- src/coreclr/jit/morph.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1ffe9a8f0be815..1dc6cb7af0e881 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10800,7 +10800,13 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) break; } +#if defined(TARGET_XARCH) GenTree* vectorNode = cvtOp->Op(1); +#elif defined(TARGET_ARM64) + GenTree* vectorNode = cvtOp->Op(2); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 DEBUG_DESTROY_NODE(op, node); INDEBUG(vectorNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);