From f4e92e379c010913b8adda7273b41198e31d4c9a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 7 Jul 2017 10:16:03 -0700 Subject: [PATCH 1/2] Tweak definition of probestack functions It looks like the old `__rust_probestack` routine is incompatible with newer linux kernels. My best guess for this is that the kernel's auto-growth logic is failing to trigger, causing what looks like a legitimate segfault to get delivered. My best guess for why *that's* happening is that the faulting address is below `%rsp`, whereas previously all faulting stack addresses were above `%rsp`. The probestack routine does not modify `%rsp` as it's probing the stack, and presumably newer kernels are interpreting this as a legitimate violation. This commit tweaks the probestack routine to instead update `%rsp` incrementally as probing happens. The ABI of the function, however, requires that `%rsp` isn't changed as part of the function so it's restored at the end to the previous value. --- library/compiler-builtins/src/probestack.rs | 46 +++++++++++---------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/library/compiler-builtins/src/probestack.rs b/library/compiler-builtins/src/probestack.rs index e59fe861a7e7..e1224fe7ace7 100644 --- a/library/compiler-builtins/src/probestack.rs +++ b/library/compiler-builtins/src/probestack.rs @@ -53,28 +53,33 @@ pub unsafe extern fn __rust_probestack() { // The ABI here is that the stack frame size is located in `%eax`. Upon // return we're not supposed to modify `%esp` or `%eax`. asm!(" - lea 8(%rsp),%r11 // rsp before calling this routine -> r11 + mov %rax,%r11 // duplicate %rax as we're clobbering %r11 - // Main loop, taken in one page increments. We're decrementing r11 by + // Main loop, taken in one page increments. We're decrementing rsp by // a page each time until there's less than a page remaining. We're // guaranteed that this function isn't called unless there's more than a - // page needed + // page needed. + // + // Note that we're also testing against `8(%rsp)` to account for the 8 + // bytes pushed on the stack orginally with our return address. Using + // `8(%rsp)` simulates us testing the stack pointer in the caller's + // context. 2: + sub $$0x1000,%rsp + test %rsp,8(%rsp) sub $$0x1000,%r11 - test %r11,(%r11) - sub $$0x1000,%rax - cmp $$0x1000,%rax + cmp $$0x1000,%r11 ja 2b // Finish up the last remaining stack space requested, getting the last - // bits out of rax - sub %rax,%r11 - test %r11,(%r11) + // bits out of r11 + sub %r11,%rsp + test %rsp,8(%rsp) - // We now know that %r11 is (%rsp + 8 - %rax) so to recover rax - // we calculate (%rsp + 8) - %r11 which will give us %rax - lea 8(%rsp),%rax - sub %r11,%rax + // Restore the stack pointer to what it previously was when entering + // this function. The caller will readjust the stack pointer after we + // return. + add %rax,%rsp ret "); @@ -92,19 +97,18 @@ pub unsafe extern fn __rust_probestack() { // The ABI here is the same as x86_64, except everything is 32-bits large. asm!(" push %ecx - lea 8(%esp),%ecx + mov %eax,%ecx 2: + sub $$0x1000,%esp + test %esp,8(%esp) sub $$0x1000,%ecx - test %ecx,(%ecx) - sub $$0x1000,%eax - cmp $$0x1000,%eax + cmp $$0x1000,%ecx ja 2b - sub %eax,%ecx - test %ecx,(%ecx) + sub %ecx,%esp + test %esp,8(%esp) - lea 8(%esp),%eax - sub %ecx,%eax + add %eax,%esp pop %ecx ret "); From 2fa53c4b67702063258f6464fea6ed50a6b949a7 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 7 Jul 2017 11:20:04 -0700 Subject: [PATCH 2/2] Don't mangle probes all the time --- library/compiler-builtins/build.rs | 19 +++++++++++-------- library/compiler-builtins/ci/run.sh | 1 + .../compiler-builtins/examples/intrinsics.rs | 6 ++++++ library/compiler-builtins/src/float/conv.rs | 5 +++-- library/compiler-builtins/src/probestack.rs | 4 ++-- library/compiler-builtins/src/x86_64.rs | 6 +++--- 6 files changed, 26 insertions(+), 15 deletions(-) diff --git a/library/compiler-builtins/build.rs b/library/compiler-builtins/build.rs index d4c4ad247de7..25cc5204d2e7 100644 --- a/library/compiler-builtins/build.rs +++ b/library/compiler-builtins/build.rs @@ -21,9 +21,14 @@ fn main() { #[cfg(feature = "gen-tests")] tests::generate(); - // Build missing intrinsics from compiler-rt C source code - #[cfg(feature = "c")] - c::compile(&llvm_target); + // Build missing intrinsics from compiler-rt C source code. If we're + // mangling names though we assume that we're also in test mode so we don't + // build anything and we rely on the upstream implementation of compiler-rt + // functions + if !cfg!(feature = "mangled-names") { + #[cfg(feature = "c")] + c::compile(&llvm_target); + } // To compile intrinsics.rs for thumb targets, where there is no libc if llvm_target[0].starts_with("thumb") { @@ -4099,11 +4104,9 @@ mod c { // also needs to satisfy intrinsics that jemalloc or C in general may // need, so include a few more that aren't typically needed by // LLVM/Rust. - if env::var_os("CARGO_FEATURE_RUSTBUILD").is_some() { - sources.extend(&[ - "ffsdi2.c", - ]); - } + sources.extend(&[ + "ffsdi2.c", + ]); if target_os != "ios" { sources.extend( diff --git a/library/compiler-builtins/ci/run.sh b/library/compiler-builtins/ci/run.sh index c37ca489f93c..9a32ee9ea64c 100755 --- a/library/compiler-builtins/ci/run.sh +++ b/library/compiler-builtins/ci/run.sh @@ -93,6 +93,7 @@ for rlib in $(echo $path); do uniq -d | \ grep -v __x86.get_pc_thunk | \ grep -v __builtin_cl | \ + grep -v __builtin_ctz | \ grep 'T __' if test $? = 0; then diff --git a/library/compiler-builtins/examples/intrinsics.rs b/library/compiler-builtins/examples/intrinsics.rs index e7c55e835ed3..3820887895de 100644 --- a/library/compiler-builtins/examples/intrinsics.rs +++ b/library/compiler-builtins/examples/intrinsics.rs @@ -13,6 +13,8 @@ #![feature(lang_items)] #![feature(start)] #![feature(i128_type)] +#![feature(global_allocator)] +#![feature(allocator_api)] #![cfg_attr(windows, feature(panic_unwind))] #![no_std] @@ -22,6 +24,10 @@ extern crate compiler_builtins; #[cfg(windows)] extern crate panic_unwind; +#[cfg(not(thumb))] +#[global_allocator] +static A: alloc_system::System = alloc_system::System; + // NOTE cfg(not(thumbv6m)) means that the operation is not supported on ARMv6-M at all. Not even // compiler-rt provides a C/assembly implementation. diff --git a/library/compiler-builtins/src/float/conv.rs b/library/compiler-builtins/src/float/conv.rs index e12349cfff99..f2fd01d37b05 100644 --- a/library/compiler-builtins/src/float/conv.rs +++ b/library/compiler-builtins/src/float/conv.rs @@ -112,8 +112,9 @@ intrinsics! { int_to_float!(i, u32, f64) } - #[use_c_shim_if(all(any(target_arch = "x86", target_arch = "x86_64"), - not(windows)))] + #[use_c_shim_if(all(not(target_env = "msvc"), + any(target_arch = "x86", + all(not(windows), target_arch = "x86_64"))))] #[arm_aeabi_alias = __aeabi_ul2d] pub extern "C" fn __floatundidf(i: u64) -> f64 { int_to_float!(i, u64, f64) diff --git a/library/compiler-builtins/src/probestack.rs b/library/compiler-builtins/src/probestack.rs index e1224fe7ace7..329e2726511c 100644 --- a/library/compiler-builtins/src/probestack.rs +++ b/library/compiler-builtins/src/probestack.rs @@ -44,7 +44,7 @@ #![cfg(not(windows))] // Windows already has builtins to do this #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg(target_arch = "x86_64")] pub unsafe extern fn __rust_probestack() { // Our goal here is to touch each page between %rsp+8 and %rsp+8-%rax, @@ -87,7 +87,7 @@ pub unsafe extern fn __rust_probestack() { } #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] #[cfg(target_arch = "x86")] pub unsafe extern fn __rust_probestack() { // This is the same as x86_64 above, only translated for 32-bit sizes. Note diff --git a/library/compiler-builtins/src/x86_64.rs b/library/compiler-builtins/src/x86_64.rs index 072f964dd70c..64cc06c98345 100644 --- a/library/compiler-builtins/src/x86_64.rs +++ b/library/compiler-builtins/src/x86_64.rs @@ -10,7 +10,7 @@ use core::intrinsics; #[cfg(windows)] #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn ___chkstk_ms() { asm!("push %rcx push %rax @@ -34,7 +34,7 @@ pub unsafe fn ___chkstk_ms() { #[cfg(windows)] #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn __alloca() { asm!("mov %rcx,%rax // x64 _alloca is a normal function with parameter in rcx jmp ___chkstk // Jump to ___chkstk since fallthrough may be unreliable"); @@ -43,7 +43,7 @@ pub unsafe fn __alloca() { #[cfg(windows)] #[naked] -#[no_mangle] +#[cfg_attr(not(feature = "mangled-names"), no_mangle)] pub unsafe fn ___chkstk() { asm!("push %rcx cmp $$0x1000,%rax