Implement __sync builtins for thumbv6-none-eabi (#1050)

This is a PR for thumbv6-none-eabi (bere-metal Armv6k in Thumb mode) which proposed to be added by https://github.com/rust-lang/rust/pull/150138. Armv6k supports atomic instructions, but they are unavailable in Thumb mode unless Thumb-2 instructions available (v6t2). Using Thumb interworking (can be used via `#[instruction_set]`) allows us to use these instructions even from Thumb mode without Thumb-2 instructions, but LLVM does not implement that processing (as of LLVM 21), so this PR implements it in compiler-builtins. The code around `__sync` builtins is basically copied from `arm_linux.rs` which uses kernel_user_helpers for atomic implementation. The atomic implementation is a port of my [atomic-maybe-uninit inline assembly code]. This PR has been tested on QEMU 10.2.0 using patched compiler-builtins and core that applied the changes in this PR and https://github.com/rust-lang/rust/pull/150138 and the [portable-atomic no-std test suite] (can be run with `./tools/no-std.sh thumbv6-none-eabi` on that repo) which tests wrappers around `core::sync::atomic`. (Note that the target-spec used in test sets max-atomic-width to 32 and atomic_cas to true, unlike the current https://github.com/rust-lang/rust/pull/150138.) The original atomic-maybe-uninit implementation has been tested on real Arm hardware. (Note that Armv6k also supports 64-bit atomic instructions, but they are skipped here. This is because there is no corresponding code in `arm_linux.rs` (since the kernel requirements increased in 1.64, it may be possible to implement 64-bit atomics there as well. see also https://github.com/taiki-e/portable-atomic/pull/82), the code becomes more complex than for 32-bit and smaller atomics.) [atomic-maybe-uninit inline assembly code]: https://github.com/taiki-e/atomic-maybe-uninit/blob/HEAD/src/arch/arm.rs [portable-atomic no-std test suite]: https://github.com/taiki-e/portable-atomic/tree/HEAD/tests/no-std-qemu
2026-01-22 19:09:48 +09:00 · 2026-01-22 19:09:48 +09:00 · bf1239e78c
commit bf1239e78c
parent 69bedd10d2
7 changed files with 416 additions and 138 deletions
--- a/library/compiler-builtins/.github/workflows/main.yaml
+++ b/library/compiler-builtins/.github/workflows/main.yaml
@ -230,6 +230,24 @@ jobs:
          --target etc/thumbv7em-none-eabi-renamed.json \
          -Zbuild-std=core

+  # FIXME: move this target to test job once https://github.com/rust-lang/rust/pull/150138 merged.
+  build-thumbv6k:
+    name: Build thumbv6k
+    runs-on: ubuntu-24.04
+    timeout-minutes: 10
+    steps:
+    - uses: actions/checkout@v4
+    - name: Install Rust
+      run: |
+        rustup update nightly --no-self-update
+        rustup default nightly
+        rustup component add rust-src
+    - uses: Swatinem/rust-cache@v2
+    - run: |
+        cargo build -p compiler_builtins -p libm \
+          --target etc/thumbv6-none-eabi.json \
+          -Zbuild-std=core
+
  benchmarks:
    name: Benchmarks
    timeout-minutes: 20
@ -354,6 +372,7 @@ jobs:
    needs:
      - benchmarks
      - build-custom
+      - build-thumbv6k
      - clippy
      - extensive
      - miri
--- a/library/compiler-builtins/compiler-builtins/src/lib.rs
+++ b/library/compiler-builtins/compiler-builtins/src/lib.rs
@ -45,6 +45,7 @@ pub mod float;
 pub mod int;
 pub mod math;
 pub mod mem;
+pub mod sync;

 // `libm` expects its `support` module to be available in the crate root.
 use math::libm_math::support;
@ -58,13 +59,6 @@ pub mod aarch64;
 #[cfg(all(target_arch = "aarch64", target_feature = "outline-atomics"))]
 pub mod aarch64_outline_atomics;

-#[cfg(all(
-    kernel_user_helpers,
-    any(target_os = "linux", target_os = "android"),
-    target_arch = "arm"
-))]
-pub mod arm_linux;
-
 #[cfg(target_arch = "avr")]
 pub mod avr;

--- a/library/compiler-builtins/compiler-builtins/src/sync/arm_linux.rs
+++ b/library/compiler-builtins/compiler-builtins/src/sync/arm_linux.rs
@ -125,14 +125,16 @@ unsafe fn atomic_cmpxchg<T>(ptr: *mut T, oldval: u32, newval: u32) -> u32 {
    let (shift, mask) = get_shift_mask(ptr);

    loop {
-        // FIXME(safety): preconditions review needed
+        // SAFETY: the caller must guarantee that the pointer is valid for read and write
+        // and aligned to the element size.
        let curval_aligned = unsafe { atomic_load_aligned::<T>(aligned_ptr) };
        let curval = extract_aligned(curval_aligned, shift, mask);
        if curval != oldval {
            return curval;
        }
        let newval_aligned = insert_aligned(curval_aligned, newval, shift, mask);
-        // FIXME(safety): preconditions review needed
+        // SAFETY: the caller must guarantee that the pointer is valid for read and write
+        // and aligned to the element size.
        if unsafe { __kuser_cmpxchg(curval_aligned, newval_aligned, aligned_ptr) } {
            return oldval;
        }
@ -143,7 +145,8 @@ macro_rules! atomic_rmw {
    ($name:ident, $ty:ty, $op:expr, $fetch:expr) => {
        intrinsics! {
            pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty {
-                // FIXME(safety): preconditions review needed
+                // SAFETY: the caller must guarantee that the pointer is valid for read and write
+                // and aligned to the element size.
                unsafe {
                    atomic_rmw(
                        ptr,
@ -167,140 +170,15 @@ macro_rules! atomic_cmpxchg {
    ($name:ident, $ty:ty) => {
        intrinsics! {
            pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty {
-                // FIXME(safety): preconditions review needed
+                // SAFETY: the caller must guarantee that the pointer is valid for read and write
+                // and aligned to the element size.
                unsafe { atomic_cmpxchg(ptr, oldval as u32, newval as u32) as $ty }
            }
        }
    };
 }

-atomic_rmw!(@old __sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b));
-atomic_rmw!(@old __sync_fetch_and_add_2, u16, |a: u16, b: u16| a
-    .wrapping_add(b));
-atomic_rmw!(@old __sync_fetch_and_add_4, u32, |a: u32, b: u32| a
-    .wrapping_add(b));
-
-atomic_rmw!(@new __sync_add_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_add(b));
-atomic_rmw!(@new __sync_add_and_fetch_2, u16, |a: u16, b: u16| a
-    .wrapping_add(b));
-atomic_rmw!(@new __sync_add_and_fetch_4, u32, |a: u32, b: u32| a
-    .wrapping_add(b));
-
-atomic_rmw!(@old __sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
-atomic_rmw!(@old __sync_fetch_and_sub_2, u16, |a: u16, b: u16| a
-    .wrapping_sub(b));
-atomic_rmw!(@old __sync_fetch_and_sub_4, u32, |a: u32, b: u32| a
-    .wrapping_sub(b));
-
-atomic_rmw!(@new __sync_sub_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
-atomic_rmw!(@new __sync_sub_and_fetch_2, u16, |a: u16, b: u16| a
-    .wrapping_sub(b));
-atomic_rmw!(@new __sync_sub_and_fetch_4, u32, |a: u32, b: u32| a
-    .wrapping_sub(b));
-
-atomic_rmw!(@old __sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b);
-atomic_rmw!(@old __sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b);
-atomic_rmw!(@old __sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b);
-
-atomic_rmw!(@new __sync_and_and_fetch_1, u8, |a: u8, b: u8| a & b);
-atomic_rmw!(@new __sync_and_and_fetch_2, u16, |a: u16, b: u16| a & b);
-atomic_rmw!(@new __sync_and_and_fetch_4, u32, |a: u32, b: u32| a & b);
-
-atomic_rmw!(@old __sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b);
-atomic_rmw!(@old __sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b);
-atomic_rmw!(@old __sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b);
-
-atomic_rmw!(@new __sync_or_and_fetch_1, u8, |a: u8, b: u8| a | b);
-atomic_rmw!(@new __sync_or_and_fetch_2, u16, |a: u16, b: u16| a | b);
-atomic_rmw!(@new __sync_or_and_fetch_4, u32, |a: u32, b: u32| a | b);
-
-atomic_rmw!(@old __sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b);
-atomic_rmw!(@old __sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b);
-atomic_rmw!(@old __sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b);
-
-atomic_rmw!(@new __sync_xor_and_fetch_1, u8, |a: u8, b: u8| a ^ b);
-atomic_rmw!(@new __sync_xor_and_fetch_2, u16, |a: u16, b: u16| a ^ b);
-atomic_rmw!(@new __sync_xor_and_fetch_4, u32, |a: u32, b: u32| a ^ b);
-
-atomic_rmw!(@old __sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b));
-atomic_rmw!(@old __sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b));
-atomic_rmw!(@old __sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b));
-
-atomic_rmw!(@new __sync_nand_and_fetch_1, u8, |a: u8, b: u8| !(a & b));
-atomic_rmw!(@new __sync_nand_and_fetch_2, u16, |a: u16, b: u16| !(a & b));
-atomic_rmw!(@new __sync_nand_and_fetch_4, u32, |a: u32, b: u32| !(a & b));
-
-atomic_rmw!(@old __sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b {
-    a
-} else {
-    b
-});
-atomic_rmw!(@old __sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b {
-    a
-} else {
-    b
-});
-atomic_rmw!(@old __sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b {
-    a
-} else {
-    b
-});
-
-atomic_rmw!(@old __sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b {
-    a
-} else {
-    b
-});
-atomic_rmw!(@old __sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b {
-    a
-} else {
-    b
-});
-atomic_rmw!(@old __sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b {
-    a
-} else {
-    b
-});
-
-atomic_rmw!(@old __sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b {
-    a
-} else {
-    b
-});
-atomic_rmw!(@old __sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b {
-    a
-} else {
-    b
-});
-atomic_rmw!(@old __sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b {
-    a
-} else {
-    b
-});
-
-atomic_rmw!(@old __sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b {
-    a
-} else {
-    b
-});
-atomic_rmw!(@old __sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b {
-    a
-} else {
-    b
-});
-atomic_rmw!(@old __sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b {
-    a
-} else {
-    b
-});
-
-atomic_rmw!(@old __sync_lock_test_and_set_1, u8, |_: u8, b: u8| b);
-atomic_rmw!(@old __sync_lock_test_and_set_2, u16, |_: u16, b: u16| b);
-atomic_rmw!(@old __sync_lock_test_and_set_4, u32, |_: u32, b: u32| b);
-
-atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8);
-atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16);
-atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32);
+include!("arm_thumb_shared.rs");

 intrinsics! {
    pub unsafe extern "C" fn __sync_synchronize() {
--- a/library/compiler-builtins/compiler-builtins/src/sync/arm_thumb_shared.rs
+++ b/library/compiler-builtins/compiler-builtins/src/sync/arm_thumb_shared.rs
@ -0,0 +1,134 @@
+// Used by both arm_linux.rs and thumbv6k.rs.
+
+// References:
+// - https://llvm.org/docs/Atomics.html#libcalls-sync
+// - https://gcc.gnu.org/onlinedocs/gcc/_005f_005fsync-Builtins.html
+// - https://refspecs.linuxfoundation.org/elf/IA64-SysV-psABI.pdf#page=58
+
+atomic_rmw!(@old __sync_fetch_and_add_1, u8, |a: u8, b: u8| a.wrapping_add(b));
+atomic_rmw!(@old __sync_fetch_and_add_2, u16, |a: u16, b: u16| a
+    .wrapping_add(b));
+atomic_rmw!(@old __sync_fetch_and_add_4, u32, |a: u32, b: u32| a
+    .wrapping_add(b));
+
+atomic_rmw!(@new __sync_add_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_add(b));
+atomic_rmw!(@new __sync_add_and_fetch_2, u16, |a: u16, b: u16| a
+    .wrapping_add(b));
+atomic_rmw!(@new __sync_add_and_fetch_4, u32, |a: u32, b: u32| a
+    .wrapping_add(b));
+
+atomic_rmw!(@old __sync_fetch_and_sub_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
+atomic_rmw!(@old __sync_fetch_and_sub_2, u16, |a: u16, b: u16| a
+    .wrapping_sub(b));
+atomic_rmw!(@old __sync_fetch_and_sub_4, u32, |a: u32, b: u32| a
+    .wrapping_sub(b));
+
+atomic_rmw!(@new __sync_sub_and_fetch_1, u8, |a: u8, b: u8| a.wrapping_sub(b));
+atomic_rmw!(@new __sync_sub_and_fetch_2, u16, |a: u16, b: u16| a
+    .wrapping_sub(b));
+atomic_rmw!(@new __sync_sub_and_fetch_4, u32, |a: u32, b: u32| a
+    .wrapping_sub(b));
+
+atomic_rmw!(@old __sync_fetch_and_and_1, u8, |a: u8, b: u8| a & b);
+atomic_rmw!(@old __sync_fetch_and_and_2, u16, |a: u16, b: u16| a & b);
+atomic_rmw!(@old __sync_fetch_and_and_4, u32, |a: u32, b: u32| a & b);
+
+atomic_rmw!(@new __sync_and_and_fetch_1, u8, |a: u8, b: u8| a & b);
+atomic_rmw!(@new __sync_and_and_fetch_2, u16, |a: u16, b: u16| a & b);
+atomic_rmw!(@new __sync_and_and_fetch_4, u32, |a: u32, b: u32| a & b);
+
+atomic_rmw!(@old __sync_fetch_and_or_1, u8, |a: u8, b: u8| a | b);
+atomic_rmw!(@old __sync_fetch_and_or_2, u16, |a: u16, b: u16| a | b);
+atomic_rmw!(@old __sync_fetch_and_or_4, u32, |a: u32, b: u32| a | b);
+
+atomic_rmw!(@new __sync_or_and_fetch_1, u8, |a: u8, b: u8| a | b);
+atomic_rmw!(@new __sync_or_and_fetch_2, u16, |a: u16, b: u16| a | b);
+atomic_rmw!(@new __sync_or_and_fetch_4, u32, |a: u32, b: u32| a | b);
+
+atomic_rmw!(@old __sync_fetch_and_xor_1, u8, |a: u8, b: u8| a ^ b);
+atomic_rmw!(@old __sync_fetch_and_xor_2, u16, |a: u16, b: u16| a ^ b);
+atomic_rmw!(@old __sync_fetch_and_xor_4, u32, |a: u32, b: u32| a ^ b);
+
+atomic_rmw!(@new __sync_xor_and_fetch_1, u8, |a: u8, b: u8| a ^ b);
+atomic_rmw!(@new __sync_xor_and_fetch_2, u16, |a: u16, b: u16| a ^ b);
+atomic_rmw!(@new __sync_xor_and_fetch_4, u32, |a: u32, b: u32| a ^ b);
+
+atomic_rmw!(@old __sync_fetch_and_nand_1, u8, |a: u8, b: u8| !(a & b));
+atomic_rmw!(@old __sync_fetch_and_nand_2, u16, |a: u16, b: u16| !(a & b));
+atomic_rmw!(@old __sync_fetch_and_nand_4, u32, |a: u32, b: u32| !(a & b));
+
+atomic_rmw!(@new __sync_nand_and_fetch_1, u8, |a: u8, b: u8| !(a & b));
+atomic_rmw!(@new __sync_nand_and_fetch_2, u16, |a: u16, b: u16| !(a & b));
+atomic_rmw!(@new __sync_nand_and_fetch_4, u32, |a: u32, b: u32| !(a & b));
+
+atomic_rmw!(@old __sync_fetch_and_max_1, i8, |a: i8, b: i8| if a > b {
+    a
+} else {
+    b
+});
+atomic_rmw!(@old __sync_fetch_and_max_2, i16, |a: i16, b: i16| if a > b {
+    a
+} else {
+    b
+});
+atomic_rmw!(@old __sync_fetch_and_max_4, i32, |a: i32, b: i32| if a > b {
+    a
+} else {
+    b
+});
+
+atomic_rmw!(@old __sync_fetch_and_umax_1, u8, |a: u8, b: u8| if a > b {
+    a
+} else {
+    b
+});
+atomic_rmw!(@old __sync_fetch_and_umax_2, u16, |a: u16, b: u16| if a > b {
+    a
+} else {
+    b
+});
+atomic_rmw!(@old __sync_fetch_and_umax_4, u32, |a: u32, b: u32| if a > b {
+    a
+} else {
+    b
+});
+
+atomic_rmw!(@old __sync_fetch_and_min_1, i8, |a: i8, b: i8| if a < b {
+    a
+} else {
+    b
+});
+atomic_rmw!(@old __sync_fetch_and_min_2, i16, |a: i16, b: i16| if a < b {
+    a
+} else {
+    b
+});
+atomic_rmw!(@old __sync_fetch_and_min_4, i32, |a: i32, b: i32| if a < b {
+    a
+} else {
+    b
+});
+
+atomic_rmw!(@old __sync_fetch_and_umin_1, u8, |a: u8, b: u8| if a < b {
+    a
+} else {
+    b
+});
+atomic_rmw!(@old __sync_fetch_and_umin_2, u16, |a: u16, b: u16| if a < b {
+    a
+} else {
+    b
+});
+atomic_rmw!(@old __sync_fetch_and_umin_4, u32, |a: u32, b: u32| if a < b {
+    a
+} else {
+    b
+});
+
+atomic_rmw!(@old __sync_lock_test_and_set_1, u8, |_: u8, b: u8| b);
+atomic_rmw!(@old __sync_lock_test_and_set_2, u16, |_: u16, b: u16| b);
+atomic_rmw!(@old __sync_lock_test_and_set_4, u32, |_: u32, b: u32| b);
+
+atomic_cmpxchg!(__sync_val_compare_and_swap_1, u8);
+atomic_cmpxchg!(__sync_val_compare_and_swap_2, u16);
+atomic_cmpxchg!(__sync_val_compare_and_swap_4, u32);
--- a/library/compiler-builtins/compiler-builtins/src/sync/mod.rs
+++ b/library/compiler-builtins/compiler-builtins/src/sync/mod.rs
@ -0,0 +1,20 @@
+#[cfg(all(
+    kernel_user_helpers,
+    any(target_os = "linux", target_os = "android"),
+    target_arch = "arm"
+))]
+pub mod arm_linux;
+
+// Armv6k supports atomic instructions, but they are unavailable in Thumb mode
+// unless Thumb-2 instructions available (v6t2).
+// Using Thumb interworking allows us to use these instructions even from Thumb mode
+// without Thumb-2 instructions, but LLVM does not implement that processing (as of LLVM 21),
+// so we implement it here at this time.
+// (`not(target_feature = "mclass")` is unneeded because v6k is not set on thumbv6m.)
+#[cfg(all(
+    target_arch = "arm",
+    target_feature = "thumb-mode",
+    target_feature = "v6k",
+    not(target_feature = "v6t2"),
+))]
+pub mod thumbv6k;
--- a/library/compiler-builtins/compiler-builtins/src/sync/thumbv6k.rs
+++ b/library/compiler-builtins/compiler-builtins/src/sync/thumbv6k.rs
@ -0,0 +1,213 @@
+// Armv6k supports atomic instructions, but they are unavailable in Thumb mode
+// unless Thumb-2 instructions available (v6t2).
+// Using Thumb interworking allows us to use these instructions even from Thumb mode
+// without Thumb-2 instructions, but LLVM does not implement that processing (as of LLVM 21),
+// so we implement it here at this time.
+
+use core::arch::asm;
+use core::mem;
+
+// Data Memory Barrier (DMB) operation.
+//
+// Armv6 does not support DMB instruction, so use use special instruction equivalent to it.
+//
+// Refs: https://developer.arm.com/documentation/ddi0360/f/control-coprocessor-cp15/register-descriptions/c7--cache-operations-register
+macro_rules! cp15_barrier {
+    () => {
+        "mcr p15, #0, {zero}, c7, c10, #5"
+    };
+}
+
+#[instruction_set(arm::a32)]
+unsafe fn fence() {
+    unsafe {
+        asm!(
+            cp15_barrier!(),
+            // cp15_barrier! calls `mcr p15, 0, {zero}, c7, c10, 5`, and
+            // the value in the {zero} register should be zero (SBZ).
+            zero = inout(reg) 0_u32 => _,
+            options(nostack, preserves_flags),
+        );
+    }
+}
+
+trait Atomic: Copy + Eq {
+    unsafe fn load_relaxed(src: *const Self) -> Self;
+    unsafe fn cmpxchg(dst: *mut Self, current: Self, new: Self) -> Self;
+}
+
+macro_rules! atomic {
+    ($ty:ident, $suffix:tt) => {
+        impl Atomic for $ty {
+            // #[instruction_set(arm::a32)] is unneeded for ldr.
+            #[inline]
+            unsafe fn load_relaxed(
+                src: *const Self,
+            ) -> Self {
+                let out: Self;
+                // SAFETY: the caller must guarantee that the pointer is valid for read and write
+                // and aligned to the element size.
+                unsafe {
+                    asm!(
+                        concat!("ldr", $suffix, " {out}, [{src}]"), // atomic { out = *src }
+                        src = in(reg) src,
+                        out = lateout(reg) out,
+                        options(nostack, preserves_flags),
+                    );
+                }
+                out
+            }
+            #[inline]
+            #[instruction_set(arm::a32)]
+            unsafe fn cmpxchg(
+                dst: *mut Self,
+                old: Self,
+                new: Self,
+            ) -> Self {
+                let mut out: Self;
+                // SAFETY: the caller must guarantee that the pointer is valid for read and write
+                // and aligned to the element size.
+                //
+                // Instead of the common `fence; ll/sc loop; fence` form, we use the form used by
+                // LLVM, which omits the preceding fence if no write operation is performed.
+                unsafe {
+                    asm!(
+                            concat!("ldrex", $suffix, " {out}, [{dst}]"),      // atomic { out = *dst; EXCLUSIVE = dst }
+                            "cmp {out}, {old}",                                // if out == old { Z = 1 } else { Z = 0 }
+                            "bne 3f",                                          // if Z == 0 { jump 'cmp-fail }
+                            cp15_barrier!(),                                            // fence
+                        "2:", // 'retry:
+                            concat!("strex", $suffix, " {r}, {new}, [{dst}]"), // atomic { if EXCLUSIVE == dst { *dst = new; r = 0 } else { r = 1 }; EXCLUSIVE = None }
+                            "cmp {r}, #0",                                     // if r == 0 { Z = 1 } else { Z = 0 }
+                            "beq 3f",                                          // if Z == 1 { jump 'success }
+                            concat!("ldrex", $suffix, " {out}, [{dst}]"),      // atomic { out = *dst; EXCLUSIVE = dst }
+                            "cmp {out}, {old}",                                // if out == old { Z = 1 } else { Z = 0 }
+                            "beq 2b",                                          // if Z == 1 { jump 'retry }
+                        "3:", // 'cmp-fail | 'success:
+                            cp15_barrier!(),                                            // fence
+                        dst = in(reg) dst,
+                        // Note: this cast must be a zero-extend since loaded value
+                        // which compared to it is zero-extended.
+                        old = in(reg) u32::from(old),
+                        new = in(reg) new,
+                        out = out(reg) out,
+                        r = out(reg) _,
+                        // cp15_barrier! calls `mcr p15, 0, {zero}, c7, c10, 5`, and
+                        // the value in the {zero} register should be zero (SBZ).
+                        zero = inout(reg) 0_u32 => _,
+                        // Do not use `preserves_flags` because CMP modifies the condition flags.
+                        options(nostack),
+                    );
+                    out
+                }
+            }
+        }
+    };
+}
+atomic!(u8, "b");
+atomic!(u16, "h");
+atomic!(u32, "");
+
+// To avoid the annoyance of sign extension, we implement signed CAS using
+// unsigned CAS. (See note in cmpxchg impl in atomic! macro)
+macro_rules! delegate_signed {
+    ($ty:ident, $base:ident) => {
+        const _: () = {
+            assert!(mem::size_of::<$ty>() == mem::size_of::<$base>());
+            assert!(mem::align_of::<$ty>() == mem::align_of::<$base>());
+        };
+        impl Atomic for $ty {
+            #[inline]
+            unsafe fn load_relaxed(src: *const Self) -> Self {
+                // SAFETY: the caller must uphold the safety contract.
+                // casts are okay because $ty and $base implement the same layout.
+                unsafe { <$base as Atomic>::load_relaxed(src.cast::<$base>()).cast_signed() }
+            }
+            #[inline]
+            unsafe fn cmpxchg(dst: *mut Self, old: Self, new: Self) -> Self {
+                // SAFETY: the caller must uphold the safety contract.
+                // casts are okay because $ty and $base implement the same layout.
+                unsafe {
+                    <$base as Atomic>::cmpxchg(
+                        dst.cast::<$base>(),
+                        old.cast_unsigned(),
+                        new.cast_unsigned(),
+                    )
+                    .cast_signed()
+                }
+            }
+        }
+    };
+}
+delegate_signed!(i8, u8);
+delegate_signed!(i16, u16);
+delegate_signed!(i32, u32);
+
+// Generic atomic read-modify-write operation
+//
+// We could implement RMW more efficiently as an assembly LL/SC loop per operation,
+// but we won't do that for now because it would make the implementation more complex.
+//
+// We also do not implement LL and SC as separate functions. This is because it
+// is theoretically possible for the compiler to insert operations that might
+// clear the reservation between LL and SC. See https://github.com/taiki-e/portable-atomic/blob/58ef7f27c9e20da4cc1ef0abf8b8ce9ac5219ec3/src/imp/atomic128/aarch64.rs#L44-L55
+// for more details.
+unsafe fn atomic_rmw<T: Atomic, F: Fn(T) -> T, G: Fn(T, T) -> T>(ptr: *mut T, f: F, g: G) -> T {
+    loop {
+        // SAFETY: the caller must guarantee that the pointer is valid for read and write
+        // and aligned to the element size.
+        let curval = unsafe { T::load_relaxed(ptr) };
+        let newval = f(curval);
+        // SAFETY: the caller must guarantee that the pointer is valid for read and write
+        // and aligned to the element size.
+        if unsafe { T::cmpxchg(ptr, curval, newval) } == curval {
+            return g(curval, newval);
+        }
+    }
+}
+
+macro_rules! atomic_rmw {
+    ($name:ident, $ty:ty, $op:expr, $fetch:expr) => {
+        intrinsics! {
+            pub unsafe extern "C" fn $name(ptr: *mut $ty, val: $ty) -> $ty {
+                // SAFETY: the caller must guarantee that the pointer is valid for read and write
+                // and aligned to the element size.
+                unsafe {
+                    atomic_rmw(
+                        ptr,
+                        |x| $op(x as $ty, val),
+                        |old, new| $fetch(old, new)
+                    ) as $ty
+                }
+            }
+        }
+    };
+
+    (@old $name:ident, $ty:ty, $op:expr) => {
+        atomic_rmw!($name, $ty, $op, |old, _| old);
+    };
+
+    (@new $name:ident, $ty:ty, $op:expr) => {
+        atomic_rmw!($name, $ty, $op, |_, new| new);
+    };
+}
+macro_rules! atomic_cmpxchg {
+    ($name:ident, $ty:ty) => {
+        intrinsics! {
+            pub unsafe extern "C" fn $name(ptr: *mut $ty, oldval: $ty, newval: $ty) -> $ty {
+                // SAFETY: the caller must guarantee that the pointer is valid for read and write
+                // and aligned to the element size.
+                unsafe { <$ty as Atomic>::cmpxchg(ptr, oldval, newval) }
+            }
+        }
+    };
+}
+
+include!("arm_thumb_shared.rs");
+
+intrinsics! {
+    pub unsafe extern "C" fn __sync_synchronize() {
+       // SAFETY: preconditions are the same as the calling function.
+       unsafe { fence() };
+    }
+}
--- a/library/compiler-builtins/etc/thumbv6-none-eabi.json
+++ b/library/compiler-builtins/etc/thumbv6-none-eabi.json
@ -0,0 +1,20 @@
+{
+  "abi": "eabi",
+  "arch": "arm",
+  "asm-args": ["-mthumb-interwork", "-march=armv6", "-mlittle-endian"],
+  "c-enum-min-bits": 8,
+  "crt-objects-fallback": "false",
+  "data-layout": "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64",
+  "emit-debug-gdb-scripts": false,
+  "features": "+soft-float,+strict-align,+v6k",
+  "frame-pointer": "always",
+  "has-thumb-interworking": true,
+  "linker": "rust-lld",
+  "linker-flavor": "gnu-lld",
+  "llvm-floatabi": "soft",
+  "llvm-target": "thumbv6-none-eabi",
+  "max-atomic-width": 32,
+  "panic-strategy": "abort",
+  "relocation-model": "static",
+  "target-pointer-width": 32
+}