Merge pull request #4658 from RalfJung/weak-mem-na-reads

weak memory: fix non-atomic read clearing store buffer
2025-11-01 06:28:52 +00:00 · 2025-11-01 06:28:52 +00:00 · 63a18d4a2d
commit 63a18d4a2d
parent 250ebfa418 851b10f455
8 changed files with 136 additions and 55 deletions
--- a/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/stacked_borrows/mod.rs
@ -749,7 +749,7 @@ trait EvalContextPrivExt<'tcx, 'ecx>: crate::MiriInterpCxExt<'tcx> {
                    // Make sure the data race model also knows about this.
                    // FIXME(genmc): Ensure this is still done in GenMC mode. Check for other places where GenMC may need to be informed.
                    if let Some(data_race) = alloc_extra.data_race.as_vclocks_mut() {
-                        data_race.write(
+                        data_race.write_non_atomic(
                            alloc_id,
                            range,
                            NaWriteType::Retag,
@ -798,7 +798,7 @@ trait EvalContextPrivExt<'tcx, 'ecx>: crate::MiriInterpCxExt<'tcx> {
                        assert_eq!(access, AccessKind::Read);
                        // Make sure the data race model also knows about this.
                        if let Some(data_race) = alloc_extra.data_race.as_vclocks_ref() {
-                            data_race.read(
+                            data_race.read_non_atomic(
                                alloc_id,
                                range,
                                NaReadType::Retag,
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@ -366,7 +366,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                // Also inform the data race model (but only if any bytes are actually affected).
                if range_in_alloc.size.bytes() > 0 {
                    if let Some(data_race) = alloc_extra.data_race.as_vclocks_ref() {
-                        data_race.read(
+                        data_race.read_non_atomic(
                            alloc_id,
                            range_in_alloc,
                            NaReadType::Retag,
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@ -648,18 +648,17 @@ impl MemoryCellClocks {
            thread_clocks.clock.index_mut(index).span = current_span;
        }
        thread_clocks.clock.index_mut(index).set_read_type(read_type);
-        if self.write_was_before(&thread_clocks.clock) {
-            // We must be ordered-after all atomic writes.
-            let race_free = if let Some(atomic) = self.atomic() {
-                atomic.write_vector <= thread_clocks.clock
-            } else {
-                true
-            };
-            self.read.set_at_index(&thread_clocks.clock, index);
-            if race_free { Ok(()) } else { Err(DataRace) }
-        } else {
-            Err(DataRace)
+        // Check synchronization with non-atomic writes.
+        if !self.write_was_before(&thread_clocks.clock) {
+            return Err(DataRace);
        }
+        // Check synchronization with atomic writes.
+        if !self.atomic().is_none_or(|atomic| atomic.write_vector <= thread_clocks.clock) {
+            return Err(DataRace);
+        }
+        // Record this access.
+        self.read.set_at_index(&thread_clocks.clock, index);
+        Ok(())
    }

    /// Detect races for non-atomic write operations at the current memory cell
@ -675,24 +674,23 @@ impl MemoryCellClocks {
        if !current_span.is_dummy() {
            thread_clocks.clock.index_mut(index).span = current_span;
        }
-        if self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock {
-            let race_free = if let Some(atomic) = self.atomic() {
-                atomic.write_vector <= thread_clocks.clock
-                    && atomic.read_vector <= thread_clocks.clock
-            } else {
-                true
-            };
-            self.write = (index, thread_clocks.clock[index]);
-            self.write_type = write_type;
-            if race_free {
-                self.read.set_zero_vector();
-                Ok(())
-            } else {
-                Err(DataRace)
-            }
-        } else {
-            Err(DataRace)
+        // Check synchronization with non-atomic accesses.
+        if !(self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock) {
+            return Err(DataRace);
        }
+        // Check synchronization with atomic accesses.
+        if !self.atomic().is_none_or(|atomic| {
+            atomic.write_vector <= thread_clocks.clock && atomic.read_vector <= thread_clocks.clock
+        }) {
+            return Err(DataRace);
+        }
+        // Record this access.
+        self.write = (index, thread_clocks.clock[index]);
+        self.write_type = write_type;
+        self.read.set_zero_vector();
+        // This is not an atomic location any more.
+        self.atomic_ops = None;
+        Ok(())
    }
 }

@ -758,14 +756,9 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
        let this = self.eval_context_mut();
        this.atomic_access_check(dest, AtomicAccessType::Store)?;

-        // Read the previous value so we can put it in the store buffer later.
-        // The program didn't actually do a read, so suppress the memory access hooks.
-        // This is also a very special exception where we just ignore an error -- if this read
-        // was UB e.g. because the memory is uninitialized, we don't want to know!
-        let old_val = this.run_for_validation_ref(|this| this.read_scalar(dest)).discard_err();
-
        // Inform GenMC about the atomic store.
        if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
+            let old_val = this.run_for_validation_ref(|this| this.read_scalar(dest)).discard_err();
            if genmc_ctx.atomic_store(
                this,
                dest.ptr().addr(),
@ -780,6 +773,9 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
            }
            return interp_ok(());
        }
+
+        // Read the previous value so we can put it in the store buffer later.
+        let old_val = this.get_latest_nonatomic_val(dest);
        this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
        this.validate_atomic_store(dest, atomic)?;
        this.buffered_atomic_write(val, dest, atomic, old_val)
@ -1201,7 +1197,7 @@ impl VClockAlloc {
    /// operation for which data-race detection is handled separately, for example
    /// atomic read operations. The `ty` parameter is used for diagnostics, letting
    /// the user know which type was read.
-    pub fn read<'tcx>(
+    pub fn read_non_atomic<'tcx>(
        &self,
        alloc_id: AllocId,
        access_range: AllocRange,
@ -1243,7 +1239,7 @@ impl VClockAlloc {
    /// being created or if it is temporarily disabled during a racy read or write
    /// operation. The `ty` parameter is used for diagnostics, letting
    /// the user know which type was written.
-    pub fn write<'tcx>(
+    pub fn write_non_atomic<'tcx>(
        &mut self,
        alloc_id: AllocId,
        access_range: AllocRange,
@ -1540,6 +1536,35 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
        )
    }

+    /// Returns the most recent *non-atomic* value stored in the given place.
+    /// Errors if we don't need that (because we don't do store buffering) or if
+    /// the most recent value is in fact atomic.
+    fn get_latest_nonatomic_val(&self, place: &MPlaceTy<'tcx>) -> Result<Option<Scalar>, ()> {
+        let this = self.eval_context_ref();
+        // These cannot fail because `atomic_access_check` was done first.
+        let (alloc_id, offset, _prov) = this.ptr_get_alloc_id(place.ptr(), 0).unwrap();
+        let alloc_meta = &this.get_alloc_extra(alloc_id).unwrap().data_race;
+        if alloc_meta.as_weak_memory_ref().is_none() {
+            // No reason to read old value if we don't track store buffers.
+            return Err(());
+        }
+        let data_race = alloc_meta.as_vclocks_ref().unwrap();
+        // Only read old value if this is currently a non-atomic location.
+        for (_range, clocks) in data_race.alloc_ranges.borrow_mut().iter(offset, place.layout.size)
+        {
+            // If this had an atomic write that's not before the non-atomic write, that should
+            // already be in the store buffer. Initializing the store buffer now would use the
+            // wrong `sync_clock` so we better make sure that does not happen.
+            if clocks.atomic().is_some_and(|atomic| !(atomic.write_vector <= clocks.write())) {
+                return Err(());
+            }
+        }
+        // The program didn't actually do a read, so suppress the memory access hooks.
+        // This is also a very special exception where we just ignore an error -- if this read
+        // was UB e.g. because the memory is uninitialized, we don't want to know!
+        Ok(this.run_for_validation_ref(|this| this.read_scalar(place)).discard_err())
+    }
+
    /// Generic atomic operation implementation
    fn validate_atomic_op<A: Debug + Copy>(
        &self,
--- a/src/tools/miri/src/concurrency/weak_memory.rs
+++ b/src/tools/miri/src/concurrency/weak_memory.rs
@ -177,11 +177,11 @@ impl StoreBufferAlloc {
        Self { store_buffers: RefCell::new(RangeObjectMap::new()) }
    }

-    /// When a non-atomic access happens on a location that has been atomically accessed
-    /// before without data race, we can determine that the non-atomic access fully happens
+    /// When a non-atomic write happens on a location that has been atomically accessed
+    /// before without data race, we can determine that the non-atomic write fully happens
    /// after all the prior atomic writes so the location no longer needs to exhibit
-    /// any weak memory behaviours until further atomic accesses.
-    pub fn memory_accessed(&self, range: AllocRange, global: &DataRaceState) {
+    /// any weak memory behaviours until further atomic writes.
+    pub fn non_atomic_write(&self, range: AllocRange, global: &DataRaceState) {
        if !global.ongoing_action_data_race_free() {
            let mut buffers = self.store_buffers.borrow_mut();
            let access_type = buffers.access_type(range);
@ -223,18 +223,23 @@ impl StoreBufferAlloc {
    fn get_or_create_store_buffer_mut<'tcx>(
        &mut self,
        range: AllocRange,
-        init: Option<Scalar>,
+        init: Result<Option<Scalar>, ()>,
    ) -> InterpResult<'tcx, &mut StoreBuffer> {
        let buffers = self.store_buffers.get_mut();
        let access_type = buffers.access_type(range);
        let pos = match access_type {
            AccessType::PerfectlyOverlapping(pos) => pos,
            AccessType::Empty(pos) => {
+                let init =
+                    init.expect("cannot have empty store buffer when previous write was atomic");
                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
                pos
            }
            AccessType::ImperfectlyOverlapping(pos_range) => {
                // Once we reach here we would've already checked that this access is not racy.
+                let init = init.expect(
+                    "cannot have partially overlapping store buffer when previous write was atomic",
+                );
                buffers.remove_pos_range(pos_range.clone());
                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
                pos_range.start
@ -490,7 +495,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
            }
            let range = alloc_range(base_offset, place.layout.size);
            let sync_clock = data_race_clocks.sync_clock(range);
-            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Some(init))?;
+            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Ok(Some(init)))?;
            // The RMW always reads from the most recent store.
            buffer.read_from_last_store(global, threads, atomic == AtomicRwOrd::SeqCst);
            buffer.buffered_write(
@ -556,7 +561,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
    /// Add the given write to the store buffer. (Does not change machine memory.)
    ///
    /// `init` says with which value to initialize the store buffer in case there wasn't a store
-    /// buffer for this memory range before.
+    /// buffer for this memory range before. `Err(())` means the value is not available;
+    /// `Ok(None)` means the memory does not contain a valid scalar.
    ///
    /// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
    fn buffered_atomic_write(
@ -564,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
        val: Scalar,
        dest: &MPlaceTy<'tcx>,
        atomic: AtomicWriteOrd,
-        init: Option<Scalar>,
+        init: Result<Option<Scalar>, ()>,
    ) -> InterpResult<'tcx> {
        let this = self.eval_context_mut();
        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr(), 0)?;
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@ -1536,14 +1536,11 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
                genmc_ctx.memory_load(machine, ptr.addr(), range.size)?,
            GlobalDataRaceHandler::Vclocks(_data_race) => {
                let _trace = enter_trace_span!(data_race::before_memory_read);
-                let AllocDataRaceHandler::Vclocks(data_race, weak_memory) = &alloc_extra.data_race
+                let AllocDataRaceHandler::Vclocks(data_race, _weak_memory) = &alloc_extra.data_race
                else {
                    unreachable!();
                };
-                data_race.read(alloc_id, range, NaReadType::Read, None, machine)?;
-                if let Some(weak_memory) = weak_memory {
-                    weak_memory.memory_accessed(range, machine.data_race.as_vclocks_ref().unwrap());
-                }
+                data_race.read_non_atomic(alloc_id, range, NaReadType::Read, None, machine)?;
            }
        }
        if let Some(borrow_tracker) = &alloc_extra.borrow_tracker {
@ -1579,9 +1576,10 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
                else {
                    unreachable!()
                };
-                data_race.write(alloc_id, range, NaWriteType::Write, None, machine)?;
+                data_race.write_non_atomic(alloc_id, range, NaWriteType::Write, None, machine)?;
                if let Some(weak_memory) = weak_memory {
-                    weak_memory.memory_accessed(range, machine.data_race.as_vclocks_ref().unwrap());
+                    weak_memory
+                        .non_atomic_write(range, machine.data_race.as_vclocks_ref().unwrap());
                }
            }
        }
@ -1612,7 +1610,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
            GlobalDataRaceHandler::Vclocks(_global_state) => {
                let _trace = enter_trace_span!(data_race::before_memory_deallocation);
                let data_race = alloc_extra.data_race.as_vclocks_mut().unwrap();
-                data_race.write(
+                data_race.write_non_atomic(
                    alloc_id,
                    alloc_range(Size::ZERO, size),
                    NaWriteType::Deallocate,
--- a/src/tools/miri/tests/pass/0weak_memory/weak.rs
+++ b/src/tools/miri/tests/pass/0weak_memory/weak.rs
@ -88,10 +88,20 @@ fn initialization_write(add_fence: bool) {
    check_all_outcomes([11, 22], || {
        let x = static_atomic(11);

+        if add_fence {
+            // For the fun of it, let's make this location atomic and non-atomic again,
+            // to ensure Miri updates the state properly for that.
+            x.store(99, Relaxed);
+            unsafe { std::ptr::from_ref(x).cast_mut().write(11.into()) };
+        }
+
        let wait = static_atomic(0);

        let j1 = spawn(move || {
            x.store(22, Relaxed);
+            // Since nobody else writes to `x`, we can non-atomically read it.
+            // (This tests that we do not delete the store buffer here.)
+            unsafe { std::ptr::from_ref(x).read() };
            // Relaxed is intentional. We want to test if the thread 2 reads the initialisation write
            // after a relaxed write
            wait.store(1, Relaxed);
--- a/src/tools/miri/tests/pass/concurrency/issue-miri-1643.rs
+++ b/src/tools/miri/tests/pass/concurrency/issue-miri-1643.rs
--- a/src/tools/miri/tests/pass/concurrency/issue-miri-4655-mix-atomic-nonatomic.rs
+++ b/src/tools/miri/tests/pass/concurrency/issue-miri-4655-mix-atomic-nonatomic.rs
@ -0,0 +1,42 @@
+//! This reproduces #4655 every single time
+//@ compile-flags: -Zmiri-fixed-schedule -Zmiri-disable-stacked-borrows
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::{ptr, thread};
+
+const SIZE: usize = 256;
+
+static mut ARRAY: [u8; SIZE] = [0; _];
+// Everything strictly less than this has been initialized by the sender.
+static POS: AtomicUsize = AtomicUsize::new(0);
+
+fn main() {
+    // Sender
+    let th = std::thread::spawn(|| {
+        for i in 0..SIZE {
+            unsafe { ptr::write(&raw mut ARRAY[i], 1) };
+            POS.store(i + 1, Ordering::Release);
+
+            thread::yield_now();
+
+            // We are the only writer, so we can do non-atomic reads as well.
+            unsafe { (&raw const POS).cast::<usize>().read() };
+        }
+    });
+
+    // Receiver
+    loop {
+        let i = POS.load(Ordering::Acquire);
+
+        if i > 0 {
+            unsafe { ptr::read(&raw const ARRAY[i - 1]) };
+        }
+
+        if i == SIZE {
+            break;
+        }
+
+        thread::yield_now();
+    }
+
+    th.join().unwrap();
+}