Allow non-racy mixed size accesses

2022-05-25 20:46:08 +01:00 · 2022-05-25 20:46:08 +01:00 · 613d60db0b
commit 613d60db0b
parent 226ed41cca
5 changed files with 262 additions and 26 deletions
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@ -287,6 +287,20 @@ impl MemoryCellClocks {
        Ok(())
    }

+    /// Checks if the memory cell write races with any prior atomic read or write
+    fn write_race_free_with_atomic(&mut self, clocks: &ThreadClockSet) -> bool {
+        if let Some(atomic) = self.atomic() {
+            atomic.read_vector <= clocks.clock && atomic.write_vector <= clocks.clock
+        } else {
+            true
+        }
+    }
+
+    /// Checks if the memory cell read races with any prior atomic write
+    fn read_race_free_with_atomic(&self, clocks: &ThreadClockSet) -> bool {
+        if let Some(atomic) = self.atomic() { atomic.write_vector <= clocks.clock } else { true }
+    }
+
    /// Update memory cell data-race tracking for atomic
    /// load relaxed semantics, is a no-op if this memory was
    /// not used previously as atomic memory.
@ -514,6 +528,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
        // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
        // Only metadata on the location itself is used.
        let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
+        this.validate_overlapping_atomic_read(place)?;
        this.buffered_atomic_read(place, atomic, scalar, || {
            this.validate_atomic_load(place, atomic)
        })
@ -527,6 +542,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
        atomic: AtomicWriteOp,
    ) -> InterpResult<'tcx> {
        let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(dest)?;
        this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
        this.validate_atomic_store(dest, atomic)?;
        // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
@ -547,6 +563,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
    ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
        let this = self.eval_context_mut();

+        this.validate_overlapping_atomic_write(place)?;
        let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;

        // Atomics wrap around on overflow.
@ -575,6 +592,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
        let this = self.eval_context_mut();

+        this.validate_overlapping_atomic_write(place)?;
        let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
        this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;

@ -595,6 +613,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
    ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
        let this = self.eval_context_mut();

+        this.validate_overlapping_atomic_write(place)?;
        let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
        let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?;

@ -637,6 +656,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
        use rand::Rng as _;
        let this = self.eval_context_mut();

+        this.validate_overlapping_atomic_write(place)?;
        // Failure ordering cannot be stronger than success ordering, therefore first attempt
        // to read with the failure ordering and if successful then try again with the success
        // read ordering and write in the success case.
@ -686,6 +706,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
        atomic: AtomicReadOp,
    ) -> InterpResult<'tcx> {
        let this = self.eval_context_ref();
+        this.validate_overlapping_atomic_read(place)?;
        this.validate_atomic_op(
            place,
            atomic,
@ -708,6 +729,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
        atomic: AtomicWriteOp,
    ) -> InterpResult<'tcx> {
        let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(place)?;
        this.validate_atomic_op(
            place,
            atomic,
@ -733,6 +755,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
        let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
        let release = matches!(atomic, Release | AcqRel | SeqCst);
        let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(place)?;
        this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
            if acquire {
                memory.load_acquire(clocks, index)?;
@ -918,6 +941,44 @@ impl VClockAlloc {
        )
    }

+    /// Detect racing atomic writes (not data races)
+    /// on every byte of the current access range
+    pub(super) fn read_race_free_with_atomic<'tcx>(
+        &self,
+        range: AllocRange,
+        global: &GlobalState,
+    ) -> bool {
+        if global.race_detecting() {
+            let (_, clocks) = global.current_thread_state();
+            let alloc_ranges = self.alloc_ranges.borrow();
+            for (_, range) in alloc_ranges.iter(range.start, range.size) {
+                if !range.read_race_free_with_atomic(&clocks) {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
+    /// Detect racing atomic read and writes (not data races)
+    /// on every byte of the current access range
+    pub(super) fn write_race_free_with_atomic<'tcx>(
+        &mut self,
+        range: AllocRange,
+        global: &GlobalState,
+    ) -> bool {
+        if global.race_detecting() {
+            let (_, clocks) = global.current_thread_state();
+            let alloc_ranges = self.alloc_ranges.get_mut();
+            for (_, range) in alloc_ranges.iter_mut(range.start, range.size) {
+                if !range.write_race_free_with_atomic(&clocks) {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
    /// Detect data-races for an unsynchronized read operation, will not perform
    /// data-race detection if `race_detecting()` is false, either due to no threads
    /// being created or if it is temporarily disabled during a racy read or write
@ -1027,7 +1088,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                let (alloc_id, base_offset, _tag) = this.ptr_get_alloc_id(place.ptr)?;
                // Load and log the atomic operation.
                // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
-                let alloc_meta = &this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
+                let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
                log::trace!(
                    "Atomic op({}) with ordering {:?} on {:?} (size={})",
                    description,
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@ -29,6 +29,13 @@
 //! Additionally, writes in our implementation do not have globally unique timestamps attached. In the other two models this timestamp is
 //! used to make sure a value in a thread's view is not overwritten by a write that occured earlier than the one in the existing view.
 //! In our implementation, this is detected using read information attached to store elements, as there is no data strucutre representing reads.
+//!
+//! Safe/sound Rust allows for more operations on atomic locations than the C++20 atomic API was intended to allow, such as non-atomically accessing
+//! a previously atomically accessed location, or accessing previously atomically accessed locations with a differently sized operation
+//! (such as accessing the top 16 bits of an AtomicU32). These senarios are generally undiscussed in formalisations of C++ memory model.
+//! In Rust, these operations can only be done through a `&mut AtomicFoo` reference or one derived from it, therefore these operations
+//! can only happen after all previous accesses on the same locations. This implementation is adapted to allow these operations.
+//! A mixed size/atomicity read that races with writes, or a write that races with reads or writes will still cause UBs to be thrown.

 // Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
 // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
@ -117,6 +124,14 @@ impl StoreBufferAlloc {
        Self { store_buffers: RefCell::new(AllocationMap::new()) }
    }

+    /// Checks if the range imperfectly overlaps with existing buffers
+    /// Used to determine if mixed-size atomic accesses
+    fn is_overlapping(&self, range: AllocRange) -> bool {
+        let buffers = self.store_buffers.borrow();
+        let access_type = buffers.access_type(range);
+        matches!(access_type, AccessType::ImperfectlyOverlapping(_))
+    }
+
    /// When a non-atomic access happens on a location that has been atomically accessed
    /// before without data race, we can determine that the non-atomic access fully happens
    /// before all the prior atomic accesses so the location no longer needs to exhibit
@ -148,21 +163,16 @@ impl StoreBufferAlloc {
        let pos = match access_type {
            AccessType::PerfectlyOverlapping(pos) => pos,
            AccessType::Empty(pos) => {
-                let new_buffer = StoreBuffer::new(init);
                let mut buffers = self.store_buffers.borrow_mut();
-                buffers.insert_at_pos(pos, range, new_buffer);
+                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
                pos
            }
            AccessType::ImperfectlyOverlapping(pos_range) => {
-                // Accesses that imperfectly overlaps with existing atomic objects
-                // do not have well-defined behaviours.
-                // FIXME: if this access happens before all previous accesses on every object it overlaps
-                // with, then we would like to tolerate it. However this is not easy to check.
-                if pos_range.start + 1 == pos_range.end {
-                    throw_ub_format!("mixed-size access on an existing atomic object");
-                } else {
-                    throw_ub_format!("access overlaps with multiple existing atomic objects");
-                }
+                // Once we reach here we would've already checked that this access is not racy
+                let mut buffers = self.store_buffers.borrow_mut();
+                buffers.remove_pos_range(pos_range.clone());
+                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
+                pos_range.start
            }
        };
        Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]))
@ -179,16 +189,13 @@ impl StoreBufferAlloc {
        let pos = match access_type {
            AccessType::PerfectlyOverlapping(pos) => pos,
            AccessType::Empty(pos) => {
-                let new_buffer = StoreBuffer::new(init);
-                buffers.insert_at_pos(pos, range, new_buffer);
+                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
                pos
            }
            AccessType::ImperfectlyOverlapping(pos_range) => {
-                if pos_range.start + 1 == pos_range.end {
-                    throw_ub_format!("mixed-size access on an existing atomic object");
-                } else {
-                    throw_ub_format!("access overlaps with multiple existing atomic objects");
-                }
+                buffers.remove_pos_range(pos_range.clone());
+                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
+                pos_range.start
            }
        };
        Ok(&mut buffers[pos])
@ -392,6 +399,55 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mi
 pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
    crate::MiriEvalContextExt<'mir, 'tcx>
 {
+    // If weak memory emulation is enabled, check if this atomic op imperfectly overlaps with a previous
+    // atomic write. If it does, then we require it to be ordered (non-racy) with all previous atomic
+    // writes on all the bytes in range
+    fn validate_overlapping_atomic_read(&self, place: &MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let crate::AllocExtra {
+            weak_memory: Some(alloc_buffers),
+            data_race: Some(alloc_clocks),
+            ..
+        } = this.get_alloc_extra(alloc_id)?
+        {
+            let range = alloc_range(base_offset, place.layout.size);
+            if alloc_buffers.is_overlapping(range)
+                && !alloc_clocks
+                    .read_race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
+            {
+                throw_ub_format!("racy imperfectly overlapping atomic access");
+            }
+        }
+        Ok(())
+    }
+
+    // Same as above but needs to be ordered with all previous atomic read or writes
+    fn validate_overlapping_atomic_write(
+        &mut self,
+        place: &MPlaceTy<'tcx, Tag>,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let (
+            crate::AllocExtra {
+                weak_memory: Some(alloc_buffers),
+                data_race: Some(alloc_clocks),
+                ..
+            },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            let range = alloc_range(base_offset, place.layout.size);
+            if alloc_buffers.is_overlapping(range)
+                && !alloc_clocks.write_race_free_with_atomic(range, global)
+            {
+                throw_ub_format!("racy imperfectly overlapping atomic access");
+            }
+        }
+        Ok(())
+    }
+
    fn buffered_atomic_rmw(
        &mut self,
        new_val: ScalarMaybeUninit<Tag>,
--- a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
+++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
@ -9,8 +9,8 @@
 // so we have to stick to C++11 emulation from existing research.

 use std::sync::atomic::Ordering::*;
-use std::thread::spawn;
 use std::sync::atomic::{fence, AtomicUsize};
+use std::thread::spawn;

 // Spins until it reads the given value
 fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
@ -25,7 +25,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
 fn static_atomic(val: usize) -> &'static AtomicUsize {
    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
    // A workaround to put the initialization value in the store buffer.
-    ret.store(val, Relaxed);
+    ret.load(Relaxed);
    ret
 }

@ -82,4 +82,4 @@ pub fn main() {
    for _ in 0..500 {
        test_cpp20_rwc_syncs();
    }
-}
+}
--- a/tests/compile-fail/weak_memory/racing_mixed_size.rs
+++ b/tests/compile-fail/weak_memory/racing_mixed_size.rs
@ -0,0 +1,38 @@
+// compile-flags: -Zmiri-ignore-leaks
+
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::AtomicU32;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+fn static_atomic_u32(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
+// Wine's SRWLock implementation does this, which is definitely undefined in C++ memory model
+// https://github.com/wine-mirror/wine/blob/303f8042f9db508adaca02ef21f8de4992cb9c03/dlls/ntdll/sync.c#L543-L566
+// Though it probably works just fine on x86
+pub fn main() {
+    let x = static_atomic_u32(0);
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
--- a/tests/run-pass/weak_memory/extra_cpp.rs
+++ b/tests/run-pass/weak_memory/extra_cpp.rs
@ -4,13 +4,19 @@
 // but doable in safe (at least sound) Rust.

 #![feature(atomic_from_mut)]
+#![feature(core_intrinsics)]

 use std::sync::atomic::Ordering::*;
-use std::sync::atomic::{AtomicU16, AtomicU32, AtomicUsize};
+use std::sync::atomic::{AtomicU16, AtomicU32};
 use std::thread::spawn;

-fn static_atomic_mut(val: usize) -> &'static mut AtomicUsize {
-    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+fn static_atomic_mut(val: u32) -> &'static mut AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn static_atomic(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
    ret
 }

@ -18,6 +24,10 @@ fn split_u32(dword: &mut u32) -> &mut [u16; 2] {
    unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) }
 }

+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
 fn mem_replace() {
    let mut x = AtomicU32::new(0);

@ -31,7 +41,7 @@ fn assign_to_mut() {
    let x = static_atomic_mut(0);
    x.store(1, Relaxed);

-    *x = AtomicUsize::new(2);
+    *x = AtomicU32::new(2);

    assert_eq!(x.load(Relaxed), 2);
 }
@ -70,10 +80,81 @@ fn from_mut_split() {
    assert_eq!(x_lo_atomic.load(Relaxed), u16::from_be(0xfafa));
 }

+// Although not possible to do in safe Rust,
+// we allow non-atomic and atomic reads to race
+// as this should be sound
+fn racing_mixed_atomicity_read() {
+    let x = static_atomic(0);
+    x.store(42, Relaxed);
+
+    let j1 = spawn(move || x.load(Relaxed));
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { std::intrinsics::atomic_load_relaxed(x_ptr) }
+    });
+
+    let r1 = j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    assert_eq!(r1, 42);
+    assert_eq!(r2, 42);
+}
+
+fn racing_mixed_size_read() {
+    let x = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
+
+fn racing_mixed_atomicity_and_size_read() {
+    let x = static_atomic(u32::from_be(0xabbafafa));
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { *x_ptr };
+    });
+
+    let j3 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi)
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+
+    assert_eq!(r3, u16::from_be(0xabba));
+}

 pub fn main() {
    get_mut_write();
    from_mut_split();
    assign_to_mut();
    mem_replace();
+    racing_mixed_atomicity_read();
+    racing_mixed_size_read();
+    racing_mixed_atomicity_and_size_read();
 }