Skip to content

Commit 99ef35d

Browse files
authored
Cranelift: s390x: fix patchable call ABI to not clobber outside clobber-save area. (#12148)
It turns out that the s390x ABI is special wrt our others: the s390x System-V ABI provides an area to all callees to save clobbered GPRs. However this is only large enough for r6-r15, the callee-saves in the standard ABI. The implementation implicitly assumed this, and when I adjusted the definition of clobbered registers for the `patchable` ABI, stating that r0-r15 are clobbered instead, the code happily generated a store-multiple (`stmg`) instruction that saved too much data in too little space, overwriting other bits of the stack. Also, the clobber-save/restore sequence code only saved the bottom 64 bits of clobbered vector/float registers (which are 128 bits), implicitly encoding the fact that the SysV ABI specifies only the bottom half as callee-saved. This PR implements `patchable` properly on s390x by open-coding the sequences to save all vector registers and r0-r5 in the explicit clobber-save area (still using the SysV-defined one for r6-r15).
1 parent 2874777 commit 99ef35d

File tree

2 files changed

+261
-167
lines changed

2 files changed

+261
-167
lines changed

cranelift/codegen/src/isa/s390x/abi.rs

Lines changed: 121 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -794,28 +794,65 @@ impl ABIMachineSpec for S390xMachineDeps {
794794
});
795795
}
796796

797-
// Save FPRs.
798-
for (i, reg) in get_clobbered_fprs(frame_layout).iter().enumerate() {
799-
insts.push(Inst::VecStoreLane {
800-
size: 64,
801-
rd: reg.to_reg().into(),
802-
mem: MemArg::reg_plus_off(
803-
stack_reg(),
804-
(i * 8) as i64
805-
+ frame_layout.outgoing_args_size as i64
806-
+ frame_layout.fixed_frame_storage_size as i64,
807-
MemFlags::trusted(),
808-
),
809-
lane_imm: 0,
810-
});
811-
if flags.unwind_info() {
812-
insts.push(Inst::Unwind {
813-
inst: UnwindInst::SaveReg {
814-
clobber_offset: (i * 8) as u32,
815-
reg: reg.to_reg(),
816-
},
797+
// Write the dedicated clobber region.
798+
match call_conv {
799+
isa::CallConv::Patchable => {
800+
// Explicitly save full v0-v31. Save r0-r5; r6-r15 are
801+
// in the ABI-defined register save area.
802+
//
803+
// N.B.: unwind metadata doesn't support a full vector
804+
// reg clobber-save, and we don't expect an unwinder
805+
// to try to unwind the patchable ABI anyway, so we
806+
// omit unwind insts.
807+
for i in 0..32 {
808+
insts.push(Inst::VecStore {
809+
rd: regs::vr(i),
810+
mem: MemArg::reg_plus_off(
811+
stack_reg(),
812+
6 * 8
813+
+ (i as i64) * 16
814+
+ frame_layout.outgoing_args_size as i64
815+
+ frame_layout.fixed_frame_storage_size as i64,
816+
MemFlags::trusted(),
817+
),
818+
});
819+
}
820+
insts.push(Inst::StoreMultiple64 {
821+
rt: regs::gpr(0),
822+
rt2: regs::gpr(5),
823+
mem: MemArg::reg_plus_off(
824+
stack_reg(),
825+
frame_layout.outgoing_args_size as i64
826+
+ frame_layout.fixed_frame_storage_size as i64,
827+
MemFlags::trusted(),
828+
),
817829
});
818830
}
831+
_ => {
832+
// Save FPRs.
833+
for (i, reg) in get_clobbered_fprs(frame_layout).iter().enumerate() {
834+
insts.push(Inst::VecStoreLane {
835+
size: 64,
836+
rd: reg.to_reg().into(),
837+
mem: MemArg::reg_plus_off(
838+
stack_reg(),
839+
(i * 8) as i64
840+
+ frame_layout.outgoing_args_size as i64
841+
+ frame_layout.fixed_frame_storage_size as i64,
842+
MemFlags::trusted(),
843+
),
844+
lane_imm: 0,
845+
});
846+
if flags.unwind_info() {
847+
insts.push(Inst::Unwind {
848+
inst: UnwindInst::SaveReg {
849+
clobber_offset: (i * 8) as u32,
850+
reg: reg.to_reg(),
851+
},
852+
});
853+
}
854+
}
855+
}
819856
}
820857

821858
insts
@@ -828,8 +865,15 @@ impl ABIMachineSpec for S390xMachineDeps {
828865
) -> SmallVec<[Inst; 16]> {
829866
let mut insts = SmallVec::new();
830867

831-
// Restore FPRs.
832-
insts.extend(gen_restore_fprs(frame_layout));
868+
match call_conv {
869+
isa::CallConv::Patchable => {
870+
insts.extend(gen_restore_patchable(frame_layout));
871+
}
872+
_ => {
873+
// Restore FPRs.
874+
insts.extend(gen_restore_fprs(frame_layout));
875+
}
876+
}
833877

834878
// Restore GPRs (including SP).
835879
insts.extend(gen_restore_gprs(call_conv, frame_layout, 0));
@@ -942,17 +986,28 @@ impl ABIMachineSpec for S390xMachineDeps {
942986
// sort because the registers will be unique (there are no dups).
943987
regs.sort_unstable();
944988

945-
// Compute clobber size. We only need to count FPR save slots.
946-
let mut clobber_size = 0;
947-
for reg in &regs {
948-
match reg.to_reg().class() {
949-
RegClass::Int => {}
950-
RegClass::Float => {
951-
clobber_size += 8;
989+
// Compute clobber size. If we are in the patchable ABI, we
990+
// know we save r0-r5 and v0-v31 in the separate clobber area,
991+
// and use the register-save area for r6-r15; otherwise we
992+
// don't count GPRs and only save the FPR part of vector regs.
993+
let mut clobber_size = match call_conv {
994+
isa::CallConv::Patchable => 8 * 6 + 16 * 32,
995+
_ => {
996+
let mut clobber_size = 0;
997+
for reg in &regs {
998+
match reg.to_reg().class() {
999+
RegClass::Int => {}
1000+
RegClass::Float => {
1001+
// We only need to count FPR save slots in the
1002+
// ordinary (SysV and tail-call) ABIs.
1003+
clobber_size += 8;
1004+
}
1005+
RegClass::Vector => unreachable!(),
1006+
}
9521007
}
953-
RegClass::Vector => unreachable!(),
1008+
clobber_size
9541009
}
955-
}
1010+
};
9561011

9571012
// Common code assumes that tail-call arguments are part of the caller's
9581013
// frame. This is not correct for our tail-call convention. To ensure
@@ -1131,6 +1186,12 @@ fn get_clobbered_gprs(frame_layout: &FrameLayout) -> Option<(u8, u8)> {
11311186
let last = clobbered_gpr.last().unwrap().to_reg().hw_enc();
11321187
debug_assert!(clobbered_gpr.iter().all(|r| r.to_reg().hw_enc() >= first));
11331188
debug_assert!(clobbered_gpr.iter().all(|r| r.to_reg().hw_enc() <= last));
1189+
1190+
// Explicitly clamp `first` at r6 at a minimum; that is what the
1191+
// reg-save area has space for. If we're in the patchable ABI with
1192+
// all regs clobber-saved, we save r0-r5 separately.
1193+
let first = core::cmp::max(first, 6);
1194+
11341195
Some((first, last))
11351196
}
11361197

@@ -1227,6 +1288,35 @@ fn gen_restore_fprs(frame_layout: &FrameLayout) -> SmallVec<[Inst; 16]> {
12271288
insts
12281289
}
12291290

1291+
fn gen_restore_patchable(frame_layout: &FrameLayout) -> SmallVec<[Inst; 16]> {
1292+
let mut insts = SmallVec::new();
1293+
1294+
for i in 0..32 {
1295+
insts.push(Inst::VecLoad {
1296+
rd: regs::writable_vr(i),
1297+
mem: MemArg::reg_plus_off(
1298+
stack_reg(),
1299+
6 * 8
1300+
+ (i as i64) * 16
1301+
+ frame_layout.outgoing_args_size as i64
1302+
+ frame_layout.fixed_frame_storage_size as i64,
1303+
MemFlags::trusted(),
1304+
),
1305+
});
1306+
}
1307+
insts.push(Inst::LoadMultiple64 {
1308+
rt: regs::writable_gpr(0),
1309+
rt2: regs::writable_gpr(5),
1310+
mem: MemArg::reg_plus_off(
1311+
stack_reg(),
1312+
frame_layout.outgoing_args_size as i64 + frame_layout.fixed_frame_storage_size as i64,
1313+
MemFlags::trusted(),
1314+
),
1315+
});
1316+
1317+
insts
1318+
}
1319+
12301320
const fn sysv_clobbers() -> PRegSet {
12311321
PRegSet::empty()
12321322
.with(gpr_preg(0))

0 commit comments

Comments
 (0)