compiler-builtins: plumb LSE support for aarch64 on linux
Add dynamic support for aarch64 LSE atomic ops on linux targets when optimized-compiler-builtins is not enabled. A hook, __enable_rust_lse, is provided for the runtime to enable them if available. A future patch will use this to enable them if available. The resulting asm should exactly match that of LLVM's compiler-rt builtins, though the symbol naming for the support function and global does not.
This commit is contained in:
parent
e1b9081e69
commit
3b50253b57
1 changed files with 66 additions and 10 deletions
|
|
@ -6,9 +6,6 @@
|
|||
//! which is supported on the current CPU.
|
||||
//! See <https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics> for more discussion.
|
||||
//!
|
||||
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
|
||||
//! Use the `compiler-rt` intrinsics if you want LSE support.
|
||||
//!
|
||||
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
|
||||
//!
|
||||
//! Generate functions for each of the following symbols:
|
||||
|
|
@ -24,7 +21,18 @@
|
|||
//! We do something similar, but with macro arguments.
|
||||
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
|
||||
|
||||
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
|
||||
use core::sync::atomic::{AtomicU8, Ordering};
|
||||
|
||||
/// non-zero if the host supports LSE atomics.
|
||||
static HAVE_LSE_ATOMICS: AtomicU8 = AtomicU8::new(0);
|
||||
|
||||
intrinsics! {
|
||||
/// Call to enable LSE in outline atomic operations. The caller must verify
|
||||
/// LSE operations are supported.
|
||||
pub extern "C" fn __rust_enable_lse() {
|
||||
HAVE_LSE_ATOMICS.store(1, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
/// Translate a byte size to a Rust type.
|
||||
#[rustfmt::skip]
|
||||
|
|
@ -45,6 +53,7 @@ macro_rules! reg {
|
|||
(2, $num:literal) => { concat!("w", $num) };
|
||||
(4, $num:literal) => { concat!("w", $num) };
|
||||
(8, $num:literal) => { concat!("x", $num) };
|
||||
(16, $num:literal) => { concat!("x", $num) };
|
||||
}
|
||||
|
||||
/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
|
||||
|
|
@ -126,6 +135,41 @@ macro_rules! stxp {
|
|||
};
|
||||
}
|
||||
|
||||
// If supported, perform the requested LSE op and return, or fallthrough.
|
||||
macro_rules! try_lse_op {
|
||||
($op: literal, $ordering:ident, $bytes:tt, $($reg:literal,)* [ $mem:ident ] ) => {
|
||||
concat!(
|
||||
".arch_extension lse; ",
|
||||
"adrp x16, {have_lse}; ",
|
||||
"ldrb w16, [x16, :lo12:{have_lse}]; ",
|
||||
"cbz w16, 8f; ",
|
||||
// LSE_OP s(reg),* [$mem]
|
||||
concat!(lse!($op, $ordering, $bytes), $( " ", reg!($bytes, $reg), ", " ,)* "[", stringify!($mem), "]; ",),
|
||||
"ret; ",
|
||||
"8:"
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
// Translate memory ordering to the LSE suffix
|
||||
#[rustfmt::skip]
|
||||
macro_rules! lse_mem_sfx {
|
||||
(Relaxed) => { "" };
|
||||
(Acquire) => { "a" };
|
||||
(Release) => { "l" };
|
||||
(AcqRel) => { "al" };
|
||||
}
|
||||
|
||||
// Generate the aarch64 LSE operation for memory ordering and width
|
||||
macro_rules! lse {
|
||||
($op:literal, $order:ident, 16) => {
|
||||
concat!($op, "p", lse_mem_sfx!($order))
|
||||
};
|
||||
($op:literal, $order:ident, $bytes:tt) => {
|
||||
concat!($op, lse_mem_sfx!($order), size!($bytes))
|
||||
};
|
||||
}
|
||||
|
||||
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
|
||||
macro_rules! compare_and_swap {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
|
|
@ -137,7 +181,9 @@ macro_rules! compare_and_swap {
|
|||
) -> int_ty!($bytes) {
|
||||
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
|
||||
core::arch::naked_asm! {
|
||||
// UXT s(tmp0), s(0)
|
||||
// CAS s(0), s(1), [x2]; if LSE supported.
|
||||
try_lse_op!("cas", $ordering, $bytes, 0, 1, [x2]),
|
||||
// UXT s(tmp0), s(0)
|
||||
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
// LDXR s(0), [x2]
|
||||
|
|
@ -150,6 +196,7 @@ macro_rules! compare_and_swap {
|
|||
"cbnz w17, 0b",
|
||||
"1:",
|
||||
"ret",
|
||||
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -166,6 +213,8 @@ macro_rules! compare_and_swap_i128 {
|
|||
expected: i128, desired: i128, ptr: *mut i128
|
||||
) -> i128 {
|
||||
core::arch::naked_asm! {
|
||||
// CASP x0, x1, x2, x3, [x4]; if LSE supported.
|
||||
try_lse_op!("cas", $ordering, 16, 0, 1, 2, 3, [x4]),
|
||||
"mov x16, x0",
|
||||
"mov x17, x1",
|
||||
"0:",
|
||||
|
|
@ -179,6 +228,7 @@ macro_rules! compare_and_swap_i128 {
|
|||
"cbnz w15, 0b",
|
||||
"1:",
|
||||
"ret",
|
||||
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -195,6 +245,8 @@ macro_rules! swap {
|
|||
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
|
||||
) -> int_ty!($bytes) {
|
||||
core::arch::naked_asm! {
|
||||
// SWP s(0), s(0), [x1]; if LSE supported.
|
||||
try_lse_op!("swp", $ordering, $bytes, 0, 0, [x1]),
|
||||
// mov s(tmp0), s(0)
|
||||
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
|
|
@ -204,6 +256,7 @@ macro_rules! swap {
|
|||
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
|
||||
"cbnz w17, 0b",
|
||||
"ret",
|
||||
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -212,7 +265,7 @@ macro_rules! swap {
|
|||
|
||||
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
|
||||
macro_rules! fetch_op {
|
||||
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
|
||||
($ordering:ident, $bytes:tt, $name:ident, $op:literal, $lse_op:literal) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[unsafe(naked)]
|
||||
|
|
@ -220,6 +273,8 @@ macro_rules! fetch_op {
|
|||
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
|
||||
) -> int_ty!($bytes) {
|
||||
core::arch::naked_asm! {
|
||||
// LSEOP s(0), s(0), [x1]; if LSE supported.
|
||||
try_lse_op!($lse_op, $ordering, $bytes, 0, 0, [x1]),
|
||||
// mov s(tmp0), s(0)
|
||||
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
|
|
@ -231,6 +286,7 @@ macro_rules! fetch_op {
|
|||
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
|
||||
"cbnz w15, 0b",
|
||||
"ret",
|
||||
have_lse = sym crate::aarch64_linux::HAVE_LSE_ATOMICS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -240,25 +296,25 @@ macro_rules! fetch_op {
|
|||
// We need a single macro to pass to `foreach_ldadd`.
|
||||
macro_rules! add {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "add" }
|
||||
fetch_op! { $ordering, $bytes, $name, "add", "ldadd" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! and {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "bic" }
|
||||
fetch_op! { $ordering, $bytes, $name, "bic", "ldclr" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! xor {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "eor" }
|
||||
fetch_op! { $ordering, $bytes, $name, "eor", "ldeor" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! or {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "orr" }
|
||||
fetch_op! { $ordering, $bytes, $name, "orr", "ldset" }
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue