-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[VPlan] Transform VPFirstOrderRecurrencePHIRecipe into concrete recipes #172009
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-risc-v Author: Mel Chen (Mel-Chen) ChangesCurrently, VPFirstOrderRecurrencePHIRecipe sets the start value from the preheader to [poison, ..., poison, start] during ::execute when generating the PHI node. This patch explicitly converts the start value of VPFirstOrderRecurrencePHIRecipe in the VPlan that will be executed to Instruction::InsertElement and transforms the VPFirstOrderRecurrencePHIRecipe itself into a VPWidenPHIRecipe. This allows redundant start value initializations to be eliminated by VPlanTransform::cse. Patch is 39.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172009.diff 12 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0108351f821f4..e2a7ddbe88966 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7383,7 +7383,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
: TargetTransformInfo::RGK_FixedWidthVector));
VPlanTransforms::removeDeadRecipes(BestVPlan);
- VPlanTransforms::convertToConcreteRecipes(BestVPlan);
+ VPlanTransforms::convertToConcreteRecipes(BestVPlan, BestVF);
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 0f8f6abab7b0a..2afb7edeced16 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2376,7 +2376,11 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
}
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe since VPFirstOrderRecurrencePHIRecipe "
+ "should be transformed to VPWidenPHIRecipe");
+ }
/// Return the cost of this first-order recurrence phi recipe.
InstructionCost computeCost(ElementCount VF,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index a586aafa2855d..c695b5c22cc65 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -75,6 +75,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
switch (Opcode) {
case Instruction::ExtractElement:
case Instruction::Freeze:
+ case Instruction::InsertElement:
case VPInstruction::ReductionStartVector:
case VPInstruction::ResumeForEpilogue:
return inferScalarType(R->getOperand(0));
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index a56b165bc1a5e..696778a18d279 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -459,6 +459,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::WidePtrAdd:
case VPInstruction::WideIVStep:
return 2;
+ case Instruction::InsertElement:
case Instruction::Select:
case VPInstruction::ActiveLaneMask:
case VPInstruction::ComputeAnyOfResult:
@@ -572,6 +573,13 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
return Builder.CreateCmp(getPredicate(), A, B, Name);
}
+ case Instruction::InsertElement: {
+ assert(State.VF.isVector() && "Only insert element into vector");
+ Value *Vec = State.get(getOperand(0));
+ Value *Elt = State.get(getOperand(1), /*IsScalar=*/true);
+ Value *Idx = State.get(getOperand(2), /*IsScalar=*/true);
+ return Builder.CreateInsertElement(Vec, Elt, Idx, Name);
+ }
case Instruction::PHI: {
llvm_unreachable("should be handled by VPPhi::execute");
}
@@ -1179,6 +1187,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case Instruction::Freeze:
case Instruction::FCmp:
case Instruction::ICmp:
+ case Instruction::InsertElement:
case Instruction::Select:
case Instruction::PHI:
case VPInstruction::AnyOf:
@@ -1223,6 +1232,8 @@ bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {
return false;
case Instruction::ExtractElement:
return Op == getOperand(1);
+ case Instruction::InsertElement:
+ return Op == getOperand(1) || Op == getOperand(2);
case Instruction::PHI:
return true;
case Instruction::FCmp:
@@ -4299,35 +4310,6 @@ void VPWidenCanonicalIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
}
#endif
-void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
- auto &Builder = State.Builder;
- // Create a vector from the initial value.
- auto *VectorInit = getStartValue()->getLiveInIRValue();
-
- Type *VecTy = State.VF.isScalar()
- ? VectorInit->getType()
- : VectorType::get(VectorInit->getType(), State.VF);
-
- BasicBlock *VectorPH =
- State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
- if (State.VF.isVector()) {
- auto *IdxTy = Builder.getInt32Ty();
- auto *One = ConstantInt::get(IdxTy, 1);
- IRBuilder<>::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(VectorPH->getTerminator());
- auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
- auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
- VectorInit = Builder.CreateInsertElement(
- PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
- }
-
- // Create a phi node for the new recurrence.
- PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
- Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
- Phi->addIncoming(VectorInit, VectorPH);
- State.set(this, Phi);
-}
-
InstructionCost
VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6627133878fdb..b4682cb27330f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3667,7 +3667,7 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
R->dissolveToCFGLoop();
}
-void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
+void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, ElementCount VF) {
VPTypeAnalysis TypeInfo(Plan);
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -3695,6 +3695,34 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
continue;
}
+ if (auto *FORPhiR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) {
+ VPValue *InitVec = FORPhiR->getStartValue();
+ DebugLoc DL = FORPhiR->getDebugLoc();
+ if (VF.isVector()) {
+ VPBuilder PHBuilder(Plan.getVectorPreheader());
+ VPValue *Poison = Plan.getOrAddLiveIn(
+ PoisonValue::get(TypeInfo.inferScalarType(InitVec)));
+ Type *IdxTy = Type::getInt32Ty(Plan.getContext());
+ VPValue *RuntimeVF = PHBuilder.createScalarZExtOrTrunc(
+ &Plan.getVF(), IdxTy, TypeInfo.inferScalarType(&Plan.getVF()),
+ DL);
+ VPValue *LastIdx = PHBuilder.createOverflowingOp(
+ Instruction::Sub, {RuntimeVF, Plan.getConstantInt(IdxTy, 1)},
+ {false, false}, DL);
+ InitVec = PHBuilder.createNaryOp(Instruction::InsertElement,
+ {Poison, InitVec, LastIdx}, DL,
+ "vector.recur.init");
+ }
+ auto *WidenPhi =
+ new VPWidenPHIRecipe(cast<PHINode>(FORPhiR->getUnderlyingInstr()),
+ InitVec, DL, "vector.recur");
+ WidenPhi->addOperand(FORPhiR->getBackedgeValue());
+ WidenPhi->insertBefore(FORPhiR);
+ FORPhiR->replaceAllUsesWith(WidenPhi);
+ ToRemove.push_back(FORPhiR);
+ continue;
+ }
+
// Expand VPBlendRecipe into VPInstruction::Select.
VPBuilder Builder(&R);
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index afdf1655b4622..067b71793cf90 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -284,7 +284,7 @@ struct VPlanTransforms {
static void canonicalizeEVLLoops(VPlan &Plan);
/// Lower abstract recipes to concrete ones, that can be codegen'd.
- static void convertToConcreteRecipes(VPlan &Plan);
+ static void convertToConcreteRecipes(VPlan &Plan, ElementCount VF);
/// This function converts initial recipes to the abstract recipes and clamps
/// \p Range based on cost model for following optimizations and cost
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index f2c0ca30a6c18..6a1618a8caa0e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -56,7 +56,8 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VSCALEFORTUNING2: [[VECTOR_PH]]:
; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
+; VSCALEFORTUNING2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 4
+; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP5]], 2
; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
@@ -73,18 +74,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i32> [[TMP13]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP15:%.*]] = zext <vscale x 4 x i32> [[TMP14]] to <vscale x 4 x i64>
; VSCALEFORTUNING2-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP15]]
-; VSCALEFORTUNING2-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
-; VSCALEFORTUNING2-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP18]], 4
-; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1
-; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP20]]
-; VSCALEFORTUNING2-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
-; VSCALEFORTUNING2-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 4
+; VSCALEFORTUNING2-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP5]] to i32
; VSCALEFORTUNING2-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1
; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP23]]
; VSCALEFORTUNING2-NEXT: br label %[[VECTOR_BODY:.*]]
; VSCALEFORTUNING2: [[VECTOR_BODY]]:
; VSCALEFORTUNING2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ]
+; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP47:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[VECTOR_BODY]] ]
@@ -133,13 +129,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VSCALEFORTUNING2: [[SCALAR_PH]]:
; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
-; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT11:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP50]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: br label %[[LOOP:.*]]
; VSCALEFORTUNING2: [[LOOP]]:
; VSCALEFORTUNING2-NEXT: [[TMP54:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP57:%.*]], %[[LOOP]] ]
-; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT11]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ]
+; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT10]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[TMP56:%.*]] = add i64 [[Y]], 1
@@ -200,19 +196,14 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i32> [[TMP17]], splat (i32 1)
; PRED-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i32> [[TMP18]] to <vscale x 4 x i64>
; PRED-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP19]]
-; PRED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
-; PRED-NEXT: [[TMP23:%.*]] = mul nuw i32 [[TMP22]], 4
-; PRED-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
-; PRED-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP24]]
-; PRED-NEXT: [[TMP25:%.*]] = call i32 @llvm.vscale.i32()
-; PRED-NEXT: [[TMP26:%.*]] = mul nuw i32 [[TMP25]], 4
+; PRED-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP2]] to i32
; PRED-NEXT: [[TMP27:%.*]] = sub i32 [[TMP26]], 1
; PRED-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP27]]
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ]
+; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP41:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP12]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index 8935010e71676..8825362bb875f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -1272,15 +1272,14 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP10]]
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNOT]]
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
+; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw nsw i64 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], -1
+; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 4 x i64> [[TMP14]], splat (i64 1)
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP33]], 2
-; CHECK-NEXT: [[TMP34:%.*]] = add nsw i32 [[TMP16]], -1
-; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP34]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
index e35db479dc963..6d29f693c18c1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/first-order-recurrence-scalable-vf1.ll
@@ -13,9 +13,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2
-; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP2]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 0, i32 [[TMP5]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
index b95691f6e7c04..17968251ff4c1 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-fixed-order-recurrence.ll
@@ -18,9 +18,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP8]] to i32
-; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
-; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]], 1
+; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP25]], 1
; IF-EVL-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 33, i32 [[TMP11]]
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
@@ -57,8 +55,7 @@ define void @first_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; NO-VP-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], [[TMP3]]
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]]
-; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
-; NO-VP-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 4
+; NO-VP-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP3]] to i32
; NO-VP-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1
; NO-VP-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 33, i32 [[TMP8]]
; NO-VP-NEXT: br label %[[VECTOR_BODY:.*]]
@@ -127,14 +124,9 @@ define void @second_order_recurrence(ptr noalias %A, ptr noalias %B, i64 %TC) {
; IF-EVL-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 4
; IF-EVL-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP8]] to i32
-; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
-; IF-EVL-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP9]], 4
-; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[TMP10]],...
[truncated]
|
Currently, VPFirstOrderRecurrencePHIRecipe sets the start value from the preheader to [poison, ..., poison, start] during ::execute when generating the PHI node. This patch explicitly converts the start value of VPFirstOrderRecurrencePHIRecipe in the VPlan that will be executed to Instruction::InsertElement and transforms the VPFirstOrderRecurrencePHIRecipe itself into a VPWidenPHIRecipe. This allows redundant start value initializations to be eliminated by VPlanTransform::cse.