Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7383,7 +7383,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
: TargetTransformInfo::RGK_FixedWidthVector));
VPlanTransforms::removeDeadRecipes(BestVPlan);

VPlanTransforms::convertToConcreteRecipes(BestVPlan);
VPlanTransforms::convertToConcreteRecipes(BestVPlan, BestVF);
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -2376,7 +2376,11 @@ struct VPFirstOrderRecurrencePHIRecipe : public VPHeaderPHIRecipe {
cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
}

void execute(VPTransformState &State) override;
void execute(VPTransformState &State) override {
llvm_unreachable(
"cannot execute this recipe since VPFirstOrderRecurrencePHIRecipe "
"should be transformed to VPWidenPHIRecipe");
}

/// Return the cost of this first-order recurrence phi recipe.
InstructionCost computeCost(ElementCount VF,
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
switch (Opcode) {
case Instruction::ExtractElement:
case Instruction::Freeze:
case Instruction::InsertElement:
case VPInstruction::ReductionStartVector:
case VPInstruction::ResumeForEpilogue:
return inferScalarType(R->getOperand(0));
Expand Down
40 changes: 11 additions & 29 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::WidePtrAdd:
case VPInstruction::WideIVStep:
return 2;
case Instruction::InsertElement:
case Instruction::Select:
case VPInstruction::ActiveLaneMask:
case VPInstruction::ComputeAnyOfResult:
Expand Down Expand Up @@ -572,6 +573,13 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);
return Builder.CreateCmp(getPredicate(), A, B, Name);
}
case Instruction::InsertElement: {
assert(State.VF.isVector() && "Only insert element into vector");
Value *Vec = State.get(getOperand(0));
Value *Elt = State.get(getOperand(1), /*IsScalar=*/true);
Value *Idx = State.get(getOperand(2), /*IsScalar=*/true);
return Builder.CreateInsertElement(Vec, Elt, Idx, Name);
}
Comment on lines +576 to +582
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could move this directly after ExtractElement?

case Instruction::PHI: {
llvm_unreachable("should be handled by VPPhi::execute");
}
Expand Down Expand Up @@ -1179,6 +1187,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case Instruction::Freeze:
case Instruction::FCmp:
case Instruction::ICmp:
case Instruction::InsertElement:
case Instruction::Select:
case Instruction::PHI:
case VPInstruction::AnyOf:
Expand Down Expand Up @@ -1223,6 +1232,8 @@ bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {
return false;
case Instruction::ExtractElement:
return Op == getOperand(1);
case Instruction::InsertElement:
return Op == getOperand(1) || Op == getOperand(2);
case Instruction::PHI:
return true;
case Instruction::FCmp:
Expand Down Expand Up @@ -4299,35 +4310,6 @@ void VPWidenCanonicalIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,
}
#endif

void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
// Create a vector from the initial value.
auto *VectorInit = getStartValue()->getLiveInIRValue();

Type *VecTy = State.VF.isScalar()
? VectorInit->getType()
: VectorType::get(VectorInit->getType(), State.VF);

BasicBlock *VectorPH =
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
if (State.VF.isVector()) {
auto *IdxTy = Builder.getInt32Ty();
auto *One = ConstantInt::get(IdxTy, 1);
IRBuilder<>::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(VectorPH->getTerminator());
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
VectorInit = Builder.CreateInsertElement(
PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
}

// Create a phi node for the new recurrence.
PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");
Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());
Phi->addIncoming(VectorInit, VectorPH);
State.set(this, Phi);
}

InstructionCost
VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
Expand Down
30 changes: 29 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3667,7 +3667,7 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
R->dissolveToCFGLoop();
}

void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan, ElementCount VF) {
VPTypeAnalysis TypeInfo(Plan);
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
Expand Down Expand Up @@ -3695,6 +3695,34 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
continue;
}

if (auto *FORPhiR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) {
VPValue *InitVec = FORPhiR->getStartValue();
DebugLoc DL = FORPhiR->getDebugLoc();
if (VF.isVector()) {
VPBuilder PHBuilder(Plan.getVectorPreheader());
VPValue *Poison = Plan.getOrAddLiveIn(
PoisonValue::get(TypeInfo.inferScalarType(InitVec)));
Type *IdxTy = Type::getInt32Ty(Plan.getContext());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, insertelement must take i32 type, so this hard-coding is the right thing to do?

VPValue *RuntimeVF = PHBuilder.createScalarZExtOrTrunc(
&Plan.getVF(), IdxTy, TypeInfo.inferScalarType(&Plan.getVF()),
DL);
VPValue *LastIdx = PHBuilder.createOverflowingOp(
Instruction::Sub, {RuntimeVF, Plan.getConstantInt(IdxTy, 1)},
{false, false}, DL);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, should this set nsw?

InitVec = PHBuilder.createNaryOp(Instruction::InsertElement,
{Poison, InitVec, LastIdx}, DL,
"vector.recur.init");
}
auto *WidenPhi =
new VPWidenPHIRecipe(cast<PHINode>(FORPhiR->getUnderlyingInstr()),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to introduce VPFirstOrderRecurrencePHIRecipe::getPHINode() like in VPWidenInductionRecipe::getPHINode()?

InitVec, DL, "vector.recur");
WidenPhi->addOperand(FORPhiR->getBackedgeValue());
WidenPhi->insertBefore(FORPhiR);
FORPhiR->replaceAllUsesWith(WidenPhi);
ToRemove.push_back(FORPhiR);
continue;
}

// Expand VPBlendRecipe into VPInstruction::Select.
VPBuilder Builder(&R);
if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ struct VPlanTransforms {
static void canonicalizeEVLLoops(VPlan &Plan);

/// Lower abstract recipes to concrete ones, that can be codegen'd.
static void convertToConcreteRecipes(VPlan &Plan);
static void convertToConcreteRecipes(VPlan &Plan, ElementCount VF);

/// This function converts initial recipes to the abstract recipes and clamps
/// \p Range based on cost model for following optimizations and cost
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; VSCALEFORTUNING2: [[VECTOR_PH]]:
; VSCALEFORTUNING2-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; VSCALEFORTUNING2-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP3]], 4
; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP5]], 2
; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
Expand All @@ -73,18 +74,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i32> [[TMP13]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP15:%.*]] = zext <vscale x 4 x i32> [[TMP14]] to <vscale x 4 x i64>
; VSCALEFORTUNING2-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP15]]
; VSCALEFORTUNING2-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32()
; VSCALEFORTUNING2-NEXT: [[TMP19:%.*]] = mul nuw i32 [[TMP18]], 4
; VSCALEFORTUNING2-NEXT: [[TMP20:%.*]] = sub i32 [[TMP19]], 1
; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP20]]
; VSCALEFORTUNING2-NEXT: [[TMP21:%.*]] = call i32 @llvm.vscale.i32()
; VSCALEFORTUNING2-NEXT: [[TMP22:%.*]] = mul nuw i32 [[TMP21]], 4
; VSCALEFORTUNING2-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP5]] to i32
; VSCALEFORTUNING2-NEXT: [[TMP23:%.*]] = sub i32 [[TMP22]], 1
; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP23]]
; VSCALEFORTUNING2-NEXT: br label %[[VECTOR_BODY:.*]]
; VSCALEFORTUNING2: [[VECTOR_BODY]]:
; VSCALEFORTUNING2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT7:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP47:%.*]], %[[VECTOR_BODY]] ]
; VSCALEFORTUNING2-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[VECTOR_BODY]] ]
Expand Down Expand Up @@ -133,13 +129,13 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
; VSCALEFORTUNING2: [[SCALAR_PH]]:
; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT11:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP50]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; VSCALEFORTUNING2-NEXT: br label %[[LOOP:.*]]
; VSCALEFORTUNING2: [[LOOP]]:
; VSCALEFORTUNING2-NEXT: [[TMP54:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[TMP57:%.*]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT11]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[TMP55:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT10]], %[[SCALAR_PH]] ], [ [[TMP54]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[SUM_RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_2:%.*]], %[[LOOP]] ]
; VSCALEFORTUNING2-NEXT: [[TMP56:%.*]] = add i64 [[Y]], 1
Expand Down Expand Up @@ -200,19 +196,14 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i32> [[TMP17]], splat (i32 1)
; PRED-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i32> [[TMP18]] to <vscale x 4 x i64>
; PRED-NEXT: [[DOTSPLAT:%.*]] = getelementptr i32, ptr [[SRC_2]], <vscale x 4 x i64> [[TMP19]]
; PRED-NEXT: [[TMP22:%.*]] = call i32 @llvm.vscale.i32()
; PRED-NEXT: [[TMP23:%.*]] = mul nuw i32 [[TMP22]], 4
; PRED-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], 1
; PRED-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP24]]
; PRED-NEXT: [[TMP25:%.*]] = call i32 @llvm.vscale.i32()
; PRED-NEXT: [[TMP26:%.*]] = mul nuw i32 [[TMP25]], 4
; PRED-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP2]] to i32
; PRED-NEXT: [[TMP27:%.*]] = sub i32 [[TMP26]], 1
; PRED-NEXT: [[VECTOR_RECUR_INIT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 0, i32 [[TMP27]]
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[BROADCAST_SPLAT6:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[VECTOR_RECUR4:%.*]] = phi <vscale x 4 x i32> [ [[VECTOR_RECUR_INIT3]], %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP41:%.*]], %[[VECTOR_BODY]] ]
; PRED-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP12]], align 4
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1272,15 +1272,14 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP10]]
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], [[DOTNOT]]
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw nsw i64 [[TMP10]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = add nsw i32 [[TMP11]], -1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw <vscale x 4 x i64> [[TMP14]], splat (i64 1)
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP16:%.*]] = shl nuw nsw i32 [[TMP33]], 2
; CHECK-NEXT: [[TMP34:%.*]] = add nsw i32 [[TMP16]], -1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP34]]
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@ define i64 @pr97452_scalable_vf1_for(ptr %src, ptr noalias %dst) #0 {
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i32 [[TMP3]], 2
; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], 1
; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP2]], 1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 0, i32 [[TMP5]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
Expand Down
Loading