Rollup merge of #144549 - folkertdev:va-arg-arm, r=saethlin
match clang's `va_arg` assembly on arm targets
tracking issue: https://github.com/rust-lang/rust/issues/44930
For this example
```rust
#![feature(c_variadic)]
#[unsafe(no_mangle)]
unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 {
let b = args.arg::<f64>();
let c = args.arg::<f64>();
a + b + c
}
```
We currently generate (via llvm):
```asm
variadic:
sub sp, sp, #12
stmib sp, {r2, r3}
vmov d0, r0, r1
add r0, sp, #4
vldr d1, [sp, #4]
add r0, r0, #15
bic r0, r0, #7
vadd.f64 d0, d0, d1
add r1, r0, #8
str r1, [sp]
vldr d1, [r0]
vadd.f64 d0, d0, d1
vmov r0, r1, d0
add sp, sp, #12
bx lr
```
LLVM is not doing a good job. In fact, it's well-known that LLVM's implementation of `va_arg` is kind of bad, and we implement it ourselves (based on clang) for many targets already. For arm, our own `emit_ptr_va_arg` saves 3 instructions.
Next, it turns out it's important for LLVM to explicitly start and end the lifetime of the `va_list`. In https://github.com/rust-lang/rust/pull/146059 I already end the lifetime, but when looking at this again, I noticed that it is important to also start it, see https://godbolt.org/z/EGqvKTTsK: failing to explicitly start the lifetime uses an extra register.
So, the combination of `emit_ptr_va_arg` with starting/ending the lifetime makes rustc emit exactly the instructions that clang generates::
```asm
variadic:
sub sp, sp, #12
stmib sp, {r2, r3}
vmov d16, r0, r1
vldr d17, [sp, #4]
vadd.f64 d16, d16, d17
vldr d17, [sp, #12]
vadd.f64 d16, d16, d17
vmov r0, r1, d16
add sp, sp, #12
bx lr
```
The arguments to `emit_ptr_va_arg` are based on [the clang implementation](03dc2a41f3/clang/lib/CodeGen/Targets/ARM.cpp (L798-L844)).
r? ``@workingjubilee`` (I can re-roll if your queue is too full, but you do seem like the right person here)
try-job: armhf-gnu
This commit is contained in:
commit
48d684111e
5 changed files with 67 additions and 1 deletions
|
|
@ -908,6 +908,21 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
|
|||
)
|
||||
}
|
||||
"aarch64" => emit_aapcs_va_arg(bx, addr, target_ty),
|
||||
"arm" => {
|
||||
// Types wider than 16 bytes are not currently supported. Clang has special logic for
|
||||
// such types, but `VaArgSafe` is not implemented for any type that is this large.
|
||||
assert!(bx.cx.size_of(target_ty).bytes() <= 16);
|
||||
|
||||
emit_ptr_va_arg(
|
||||
bx,
|
||||
addr,
|
||||
target_ty,
|
||||
PassMode::Direct,
|
||||
SlotSize::Bytes4,
|
||||
AllowHigherAlign::Yes,
|
||||
ForceRightAdjust::No,
|
||||
)
|
||||
}
|
||||
"s390x" => emit_s390x_va_arg(bx, addr, target_ty),
|
||||
"powerpc" => emit_powerpc_va_arg(bx, addr, target_ty),
|
||||
"powerpc64" | "powerpc64le" => emit_ptr_va_arg(
|
||||
|
|
|
|||
|
|
@ -520,7 +520,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
|
|||
LocalRef::Place(va_list) => {
|
||||
bx.va_end(va_list.val.llval);
|
||||
|
||||
// Explicitly end the lifetime of the `va_list`, this matters for LLVM.
|
||||
// Explicitly end the lifetime of the `va_list`, improves LLVM codegen.
|
||||
bx.lifetime_end(va_list.val.llval, va_list.layout.size);
|
||||
}
|
||||
_ => bug!("C-variadic function must have a `VaList` place"),
|
||||
|
|
|
|||
|
|
@ -438,6 +438,10 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
|
|||
|
||||
if fx.fn_abi.c_variadic && arg_index == fx.fn_abi.args.len() {
|
||||
let va_list = PlaceRef::alloca(bx, bx.layout_of(arg_ty));
|
||||
|
||||
// Explicitly start the lifetime of the `va_list`, improves LLVM codegen.
|
||||
bx.lifetime_start(va_list.val.llval, va_list.layout.size);
|
||||
|
||||
bx.va_start(va_list.val.llval);
|
||||
|
||||
return LocalRef::Place(va_list);
|
||||
|
|
|
|||
26
tests/assembly-llvm/c-variadic-arm.rs
Normal file
26
tests/assembly-llvm/c-variadic-arm.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
//@ assembly-output: emit-asm
|
||||
//@ compile-flags: -Copt-level=3
|
||||
//@ only-arm
|
||||
//@ ignore-thumb
|
||||
//@ ignore-android
|
||||
#![no_std]
|
||||
#![crate_type = "lib"]
|
||||
#![feature(c_variadic)]
|
||||
|
||||
// Check that the assembly that rustc generates matches what clang emits.
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 {
|
||||
// CHECK-LABEL: variadic
|
||||
// CHECK: sub sp, sp
|
||||
|
||||
// CHECK: vldr
|
||||
// CHECK: vadd.f64
|
||||
// CHECK: vldr
|
||||
// CHECK: vadd.f64
|
||||
let b = args.arg::<f64>();
|
||||
let c = args.arg::<f64>();
|
||||
a + b + c
|
||||
|
||||
// CHECK: add sp, sp
|
||||
}
|
||||
21
tests/codegen-llvm/c-variadic-lifetime.rs
Normal file
21
tests/codegen-llvm/c-variadic-lifetime.rs
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
//@ add-core-stubs
|
||||
//@ compile-flags: -Copt-level=3
|
||||
#![feature(c_variadic)]
|
||||
#![crate_type = "lib"]
|
||||
|
||||
// Check that `%args` explicitly has its lifetime start and end. Being explicit can improve
|
||||
// instruction and register selection, see e.g. https://github.com/rust-lang/rust/pull/144549
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
unsafe extern "C" fn variadic(a: f64, mut args: ...) -> f64 {
|
||||
// CHECK: call void @llvm.lifetime.start.p0(i64 {{[0-9]+}}, ptr nonnull %args)
|
||||
// CHECK: call void @llvm.va_start.p0(ptr nonnull %args)
|
||||
|
||||
let b = args.arg::<f64>();
|
||||
let c = args.arg::<f64>();
|
||||
|
||||
a + b + c
|
||||
|
||||
// CHECK: call void @llvm.va_end.p0(ptr nonnull %args)
|
||||
// CHECK: call void @llvm.lifetime.end.p0(i64 {{[0-9]+}}, ptr nonnull %args)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue