Merge pull request #532 from jyn514/lse.rs
This commit is contained in:
commit
bb0d107feb
5 changed files with 436 additions and 4 deletions
|
|
@ -1,4 +1,4 @@
|
|||
use std::env;
|
||||
use std::{collections::HashMap, env, sync::atomic::Ordering};
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
|
|
@ -90,6 +90,65 @@ fn main() {
|
|||
{
|
||||
println!("cargo:rustc-cfg=kernel_user_helpers")
|
||||
}
|
||||
|
||||
if llvm_target[0] == "aarch64" {
|
||||
generate_aarch64_outlined_atomics();
|
||||
}
|
||||
}
|
||||
|
||||
fn aarch64_symbol(ordering: Ordering) -> &'static str {
|
||||
match ordering {
|
||||
Ordering::Relaxed => "relax",
|
||||
Ordering::Acquire => "acq",
|
||||
Ordering::Release => "rel",
|
||||
Ordering::AcqRel => "acq_rel",
|
||||
_ => panic!("unknown symbol for {:?}", ordering),
|
||||
}
|
||||
}
|
||||
|
||||
/// The `concat_idents` macro is extremely annoying and doesn't allow us to define new items.
|
||||
/// Define them from the build script instead.
|
||||
/// Note that the majority of the code is still defined in `aarch64.rs` through inline macros.
|
||||
fn generate_aarch64_outlined_atomics() {
|
||||
use std::fmt::Write;
|
||||
// #[macro_export] so that we can use this in tests
|
||||
let gen_macro =
|
||||
|name| format!("#[macro_export] macro_rules! foreach_{name} {{ ($macro:path) => {{\n");
|
||||
|
||||
// Generate different macros for add/clr/eor/set so that we can test them separately.
|
||||
let sym_names = ["cas", "ldadd", "ldclr", "ldeor", "ldset", "swp"];
|
||||
let mut macros = HashMap::new();
|
||||
for sym in sym_names {
|
||||
macros.insert(sym, gen_macro(sym));
|
||||
}
|
||||
|
||||
// Only CAS supports 16 bytes, and it has a different implementation that uses a different macro.
|
||||
let mut cas16 = gen_macro("cas16");
|
||||
|
||||
for ordering in [
|
||||
Ordering::Relaxed,
|
||||
Ordering::Acquire,
|
||||
Ordering::Release,
|
||||
Ordering::AcqRel,
|
||||
] {
|
||||
let sym_ordering = aarch64_symbol(ordering);
|
||||
for size in [1, 2, 4, 8] {
|
||||
for (sym, macro_) in &mut macros {
|
||||
let name = format!("__aarch64_{sym}{size}_{sym_ordering}");
|
||||
writeln!(macro_, "$macro!( {ordering:?}, {size}, {name} );").unwrap();
|
||||
}
|
||||
}
|
||||
let name = format!("__aarch64_cas16_{sym_ordering}");
|
||||
writeln!(cas16, "$macro!( {ordering:?}, {name} );").unwrap();
|
||||
}
|
||||
|
||||
let mut buf = String::new();
|
||||
for macro_def in macros.values().chain(std::iter::once(&cas16)) {
|
||||
buf += macro_def;
|
||||
buf += "}; }";
|
||||
}
|
||||
let dst = std::env::var("OUT_DIR").unwrap() + "/outlined_atomics.rs";
|
||||
std::fs::write(dst, buf).unwrap();
|
||||
}
|
||||
|
||||
#[cfg(feature = "c")]
|
||||
|
|
|
|||
277
library/compiler-builtins/src/aarch64.rs
Normal file
277
library/compiler-builtins/src/aarch64.rs
Normal file
|
|
@ -0,0 +1,277 @@
|
|||
//! Aarch64 targets have two possible implementations for atomics:
|
||||
//! 1. Load-Locked, Store-Conditional (LL/SC), older and slower.
|
||||
//! 2. Large System Extensions (LSE), newer and faster.
|
||||
//! To avoid breaking backwards compat, C toolchains introduced a concept of "outlined atomics",
|
||||
//! where atomic operations call into the compiler runtime to dispatch between two depending on
|
||||
//! which is supported on the current CPU.
|
||||
//! See https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10#:~:text=out%20of%20line%20atomics for more discussion.
|
||||
//!
|
||||
//! Currently we only support LL/SC, because LSE requires `getauxval` from libc in order to do runtime detection.
|
||||
//! Use the `compiler-rt` intrinsics if you want LSE support.
|
||||
//!
|
||||
//! Ported from `aarch64/lse.S` in LLVM's compiler-rt.
|
||||
//!
|
||||
//! Generate functions for each of the following symbols:
|
||||
//! __aarch64_casM_ORDER
|
||||
//! __aarch64_swpN_ORDER
|
||||
//! __aarch64_ldaddN_ORDER
|
||||
//! __aarch64_ldclrN_ORDER
|
||||
//! __aarch64_ldeorN_ORDER
|
||||
//! __aarch64_ldsetN_ORDER
|
||||
//! for N = {1, 2, 4, 8}, M = {1, 2, 4, 8, 16}, ORDER = { relax, acq, rel, acq_rel }
|
||||
//!
|
||||
//! The original `lse.S` has some truly horrifying code that expects to be compiled multiple times with different constants.
|
||||
//! We do something similar, but with macro arguments.
|
||||
#![cfg_attr(feature = "c", allow(unused_macros))] // avoid putting the macros into a submodule
|
||||
|
||||
// We don't do runtime dispatch so we don't have to worry about the `__aarch64_have_lse_atomics` global ctor.
|
||||
|
||||
/// Translate a byte size to a Rust type.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! int_ty {
|
||||
(1) => { i8 };
|
||||
(2) => { i16 };
|
||||
(4) => { i32 };
|
||||
(8) => { i64 };
|
||||
(16) => { i128 };
|
||||
}
|
||||
|
||||
/// Given a byte size and a register number, return a register of the appropriate size.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/102374/0101/Registers-in-AArch64---general-purpose-registers>.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! reg {
|
||||
(1, $num:literal) => { concat!("w", $num) };
|
||||
(2, $num:literal) => { concat!("w", $num) };
|
||||
(4, $num:literal) => { concat!("w", $num) };
|
||||
(8, $num:literal) => { concat!("x", $num) };
|
||||
}
|
||||
|
||||
/// Given an atomic ordering, translate it to the acquire suffix for the lxdr aarch64 ASM instruction.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! acquire {
|
||||
(Relaxed) => { "" };
|
||||
(Acquire) => { "a" };
|
||||
(Release) => { "" };
|
||||
(AcqRel) => { "a" };
|
||||
}
|
||||
|
||||
/// Given an atomic ordering, translate it to the release suffix for the stxr aarch64 ASM instruction.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! release {
|
||||
(Relaxed) => { "" };
|
||||
(Acquire) => { "" };
|
||||
(Release) => { "l" };
|
||||
(AcqRel) => { "l" };
|
||||
}
|
||||
|
||||
/// Given a size in bytes, translate it to the byte suffix for an aarch64 ASM instruction.
|
||||
#[rustfmt::skip]
|
||||
macro_rules! size {
|
||||
(1) => { "b" };
|
||||
(2) => { "h" };
|
||||
(4) => { "" };
|
||||
(8) => { "" };
|
||||
(16) => { "" };
|
||||
}
|
||||
|
||||
/// Given a byte size, translate it to an Unsigned eXTend instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/UXTB--Unsigned-Extend-Byte--an-alias-of-UBFM->
|
||||
#[rustfmt::skip]
|
||||
macro_rules! uxt {
|
||||
(1) => { "uxtb" };
|
||||
(2) => { "uxth" };
|
||||
($_:tt) => { "mov" };
|
||||
}
|
||||
|
||||
/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Register instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXR--Load-Exclusive-Register->.
|
||||
macro_rules! ldxr {
|
||||
($ordering:ident, $bytes:tt) => {
|
||||
concat!("ld", acquire!($ordering), "xr", size!($bytes))
|
||||
};
|
||||
}
|
||||
|
||||
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Register instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXR--Store-Exclusive-Register->.
|
||||
macro_rules! stxr {
|
||||
($ordering:ident, $bytes:tt) => {
|
||||
concat!("st", release!($ordering), "xr", size!($bytes))
|
||||
};
|
||||
}
|
||||
|
||||
/// Given an atomic ordering and byte size, translate it to a LoaD eXclusive Pair of registers instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/LDXP--Load-Exclusive-Pair-of-Registers->
|
||||
macro_rules! ldxp {
|
||||
($ordering:ident) => {
|
||||
concat!("ld", acquire!($ordering), "xp")
|
||||
};
|
||||
}
|
||||
|
||||
/// Given an atomic ordering and byte size, translate it to a STore eXclusive Pair of registers instruction
|
||||
/// with the correct semantics.
|
||||
///
|
||||
/// See <https://developer.arm.com/documentation/ddi0596/2020-12/Base-Instructions/STXP--Store-Exclusive-Pair-of-registers->.
|
||||
macro_rules! stxp {
|
||||
($ordering:ident) => {
|
||||
concat!("st", release!($ordering), "xp")
|
||||
};
|
||||
}
|
||||
|
||||
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.compare_and_swap>.
|
||||
macro_rules! compare_and_swap {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[naked]
|
||||
pub unsafe extern "C" fn $name (
|
||||
expected: int_ty!($bytes), desired: int_ty!($bytes), ptr: *mut int_ty!($bytes)
|
||||
) -> int_ty!($bytes) {
|
||||
// We can't use `AtomicI8::compare_and_swap`; we *are* compare_and_swap.
|
||||
unsafe { core::arch::asm! {
|
||||
// UXT s(tmp0), s(0)
|
||||
concat!(uxt!($bytes), " ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
// LDXR s(0), [x2]
|
||||
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x2]"),
|
||||
// cmp s(0), s(tmp0)
|
||||
concat!("cmp ", reg!($bytes, 0), ", ", reg!($bytes, 16)),
|
||||
"bne 1f",
|
||||
// STXR w(tmp1), s(1), [x2]
|
||||
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 1), ", [x2]"),
|
||||
"cbnz w17, 0b",
|
||||
"1:",
|
||||
"ret",
|
||||
options(noreturn)
|
||||
} }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// i128 uses a completely different impl, so it has its own macro.
|
||||
macro_rules! compare_and_swap_i128 {
|
||||
($ordering:ident, $name:ident) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[naked]
|
||||
pub unsafe extern "C" fn $name (
|
||||
expected: i128, desired: i128, ptr: *mut i128
|
||||
) -> i128 {
|
||||
unsafe { core::arch::asm! {
|
||||
"mov x16, x0",
|
||||
"mov x17, x1",
|
||||
"0:",
|
||||
// LDXP x0, x1, [x4]
|
||||
concat!(ldxp!($ordering), " x0, x1, [x4]"),
|
||||
"cmp x0, x16",
|
||||
"ccmp x1, x17, #0, eq",
|
||||
"bne 1f",
|
||||
// STXP w(tmp2), x2, x3, [x4]
|
||||
concat!(stxp!($ordering), " w15, x2, x3, [x4]"),
|
||||
"cbnz w15, 0b",
|
||||
"1:",
|
||||
"ret",
|
||||
options(noreturn)
|
||||
} }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// See <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.swap>.
|
||||
macro_rules! swap {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[naked]
|
||||
pub unsafe extern "C" fn $name (
|
||||
left: int_ty!($bytes), right_ptr: *mut int_ty!($bytes)
|
||||
) -> int_ty!($bytes) {
|
||||
unsafe { core::arch::asm! {
|
||||
// mov s(tmp0), s(0)
|
||||
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
// LDXR s(0), [x1]
|
||||
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"),
|
||||
// STXR w(tmp1), s(tmp0), [x1]
|
||||
concat!(stxr!($ordering, $bytes), " w17, ", reg!($bytes, 16), ", [x1]"),
|
||||
"cbnz w17, 0b",
|
||||
"ret",
|
||||
options(noreturn)
|
||||
} }
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// See (e.g.) <https://doc.rust-lang.org/stable/std/sync/atomic/struct.AtomicI8.html#method.fetch_add>.
|
||||
macro_rules! fetch_op {
|
||||
($ordering:ident, $bytes:tt, $name:ident, $op:literal) => {
|
||||
intrinsics! {
|
||||
#[maybe_use_optimized_c_shim]
|
||||
#[naked]
|
||||
pub unsafe extern "C" fn $name (
|
||||
val: int_ty!($bytes), ptr: *mut int_ty!($bytes)
|
||||
) -> int_ty!($bytes) {
|
||||
unsafe { core::arch::asm! {
|
||||
// mov s(tmp0), s(0)
|
||||
concat!("mov ", reg!($bytes, 16), ", ", reg!($bytes, 0)),
|
||||
"0:",
|
||||
// LDXR s(0), [x1]
|
||||
concat!(ldxr!($ordering, $bytes), " ", reg!($bytes, 0), ", [x1]"),
|
||||
// OP s(tmp1), s(0), s(tmp0)
|
||||
concat!($op, " ", reg!($bytes, 17), ", ", reg!($bytes, 0), ", ", reg!($bytes, 16)),
|
||||
// STXR w(tmp2), s(tmp1), [x1]
|
||||
concat!(stxr!($ordering, $bytes), " w15, ", reg!($bytes, 17), ", [x1]"),
|
||||
"cbnz w15, 0b",
|
||||
"ret",
|
||||
options(noreturn)
|
||||
} }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We need a single macro to pass to `foreach_ldadd`.
|
||||
macro_rules! add {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "add" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! and {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "bic" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! xor {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "eor" }
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! or {
|
||||
($ordering:ident, $bytes:tt, $name:ident) => {
|
||||
fetch_op! { $ordering, $bytes, $name, "orr" }
|
||||
};
|
||||
}
|
||||
|
||||
// See `generate_aarch64_outlined_atomics` in build.rs.
|
||||
include!(concat!(env!("OUT_DIR"), "/outlined_atomics.rs"));
|
||||
foreach_cas!(compare_and_swap);
|
||||
foreach_cas16!(compare_and_swap_i128);
|
||||
foreach_swp!(swap);
|
||||
foreach_ldadd!(add);
|
||||
foreach_ldclr!(and);
|
||||
foreach_ldeor!(xor);
|
||||
foreach_ldset!(or);
|
||||
|
|
@ -57,6 +57,9 @@ pub mod mem;
|
|||
#[cfg(target_arch = "arm")]
|
||||
pub mod arm;
|
||||
|
||||
#[cfg(all(target_arch = "aarch64", not(feature = "no-asm"),))]
|
||||
pub mod aarch64;
|
||||
|
||||
#[cfg(all(
|
||||
kernel_user_helpers,
|
||||
any(target_os = "linux", target_os = "android"),
|
||||
|
|
|
|||
|
|
@ -204,7 +204,7 @@ macro_rules! intrinsics {
|
|||
(
|
||||
#[maybe_use_optimized_c_shim]
|
||||
$(#[$($attr:tt)*])*
|
||||
pub extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? {
|
||||
pub $(unsafe $(@ $empty:tt)? )? extern $abi:tt fn $name:ident( $($argname:ident: $ty:ty),* ) $(-> $ret:ty)? {
|
||||
$($body:tt)*
|
||||
}
|
||||
|
||||
|
|
@ -212,7 +212,7 @@ macro_rules! intrinsics {
|
|||
) => (
|
||||
#[cfg($name = "optimized-c")]
|
||||
#[cfg_attr(feature = "weak-intrinsics", linkage = "weak")]
|
||||
pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
|
||||
pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
|
||||
extern $abi {
|
||||
fn $name($($argname: $ty),*) $(-> $ret)?;
|
||||
}
|
||||
|
|
@ -224,7 +224,7 @@ macro_rules! intrinsics {
|
|||
#[cfg(not($name = "optimized-c"))]
|
||||
intrinsics! {
|
||||
$(#[$($attr)*])*
|
||||
pub extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
|
||||
pub $(unsafe $($empty)? )? extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
|
||||
$($body)*
|
||||
}
|
||||
}
|
||||
|
|
|
|||
93
library/compiler-builtins/testcrate/tests/lse.rs
Normal file
93
library/compiler-builtins/testcrate/tests/lse.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#![feature(decl_macro)] // so we can use pub(super)
|
||||
#![cfg(all(target_arch = "aarch64", not(feature = "no-asm")))]
|
||||
|
||||
/// Translate a byte size to a Rust type.
|
||||
macro int_ty {
|
||||
(1) => { i8 },
|
||||
(2) => { i16 },
|
||||
(4) => { i32 },
|
||||
(8) => { i64 },
|
||||
(16) => { i128 }
|
||||
}
|
||||
|
||||
mod cas {
|
||||
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
|
||||
#[test]
|
||||
fn $name() {
|
||||
testcrate::fuzz_2(10000, |expected: super::int_ty!($bytes), new| {
|
||||
let mut target = expected.wrapping_add(10);
|
||||
assert_eq!(
|
||||
unsafe { compiler_builtins::aarch64::$name::$name(expected, new, &mut target) },
|
||||
expected.wrapping_add(10),
|
||||
"return value should always be the previous value",
|
||||
);
|
||||
assert_eq!(
|
||||
target,
|
||||
expected.wrapping_add(10),
|
||||
"shouldn't have changed target"
|
||||
);
|
||||
|
||||
target = expected;
|
||||
assert_eq!(
|
||||
unsafe { compiler_builtins::aarch64::$name::$name(expected, new, &mut target) },
|
||||
expected
|
||||
);
|
||||
assert_eq!(target, new, "should have updated target");
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro test_cas16($_ordering:ident, $name:ident) {
|
||||
cas::test!($_ordering, 16, $name);
|
||||
}
|
||||
|
||||
mod swap {
|
||||
pub(super) macro test($_ordering:ident, $bytes:tt, $name:ident) {
|
||||
#[test]
|
||||
fn $name() {
|
||||
testcrate::fuzz_2(10000, |left: super::int_ty!($bytes), mut right| {
|
||||
let orig_right = right;
|
||||
assert_eq!(
|
||||
unsafe { compiler_builtins::aarch64::$name::$name(left, &mut right) },
|
||||
orig_right
|
||||
);
|
||||
assert_eq!(left, right);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! test_op {
|
||||
($mod:ident, $( $op:tt )* ) => {
|
||||
mod $mod {
|
||||
pub(super) macro test {
|
||||
($_ordering:ident, $bytes:tt, $name:ident) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
testcrate::fuzz_2(10000, |old, val| {
|
||||
let mut target = old;
|
||||
let op: fn(super::int_ty!($bytes), super::int_ty!($bytes)) -> _ = $($op)*;
|
||||
let expected = op(old, val);
|
||||
assert_eq!(old, unsafe { compiler_builtins::aarch64::$name::$name(val, &mut target) }, "{} should return original value", stringify!($name));
|
||||
assert_eq!(expected, target, "{} should store to target", stringify!($name));
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_op!(add, |left, right| left.wrapping_add(right));
|
||||
test_op!(clr, |left, right| left & !right);
|
||||
test_op!(xor, std::ops::BitXor::bitxor);
|
||||
test_op!(or, std::ops::BitOr::bitor);
|
||||
|
||||
compiler_builtins::foreach_cas!(cas::test);
|
||||
compiler_builtins::foreach_cas16!(test_cas16);
|
||||
compiler_builtins::foreach_swp!(swap::test);
|
||||
compiler_builtins::foreach_ldadd!(add::test);
|
||||
compiler_builtins::foreach_ldclr!(clr::test);
|
||||
compiler_builtins::foreach_ldeor!(xor::test);
|
||||
compiler_builtins::foreach_ldset!(or::test);
|
||||
Loading…
Add table
Add a link
Reference in a new issue