Merge pull request #4658 from RalfJung/weak-mem-na-reads

weak memory: fix non-atomic read clearing store buffer
This commit is contained in:
Ralf Jung 2025-11-01 06:28:52 +00:00 committed by GitHub
commit 63a18d4a2d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 136 additions and 55 deletions

View file

@ -749,7 +749,7 @@ trait EvalContextPrivExt<'tcx, 'ecx>: crate::MiriInterpCxExt<'tcx> {
// Make sure the data race model also knows about this.
// FIXME(genmc): Ensure this is still done in GenMC mode. Check for other places where GenMC may need to be informed.
if let Some(data_race) = alloc_extra.data_race.as_vclocks_mut() {
data_race.write(
data_race.write_non_atomic(
alloc_id,
range,
NaWriteType::Retag,
@ -798,7 +798,7 @@ trait EvalContextPrivExt<'tcx, 'ecx>: crate::MiriInterpCxExt<'tcx> {
assert_eq!(access, AccessKind::Read);
// Make sure the data race model also knows about this.
if let Some(data_race) = alloc_extra.data_race.as_vclocks_ref() {
data_race.read(
data_race.read_non_atomic(
alloc_id,
range,
NaReadType::Retag,

View file

@ -366,7 +366,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// Also inform the data race model (but only if any bytes are actually affected).
if range_in_alloc.size.bytes() > 0 {
if let Some(data_race) = alloc_extra.data_race.as_vclocks_ref() {
data_race.read(
data_race.read_non_atomic(
alloc_id,
range_in_alloc,
NaReadType::Retag,

View file

@ -648,18 +648,17 @@ impl MemoryCellClocks {
thread_clocks.clock.index_mut(index).span = current_span;
}
thread_clocks.clock.index_mut(index).set_read_type(read_type);
if self.write_was_before(&thread_clocks.clock) {
// We must be ordered-after all atomic writes.
let race_free = if let Some(atomic) = self.atomic() {
atomic.write_vector <= thread_clocks.clock
} else {
true
};
self.read.set_at_index(&thread_clocks.clock, index);
if race_free { Ok(()) } else { Err(DataRace) }
} else {
Err(DataRace)
// Check synchronization with non-atomic writes.
if !self.write_was_before(&thread_clocks.clock) {
return Err(DataRace);
}
// Check synchronization with atomic writes.
if !self.atomic().is_none_or(|atomic| atomic.write_vector <= thread_clocks.clock) {
return Err(DataRace);
}
// Record this access.
self.read.set_at_index(&thread_clocks.clock, index);
Ok(())
}
/// Detect races for non-atomic write operations at the current memory cell
@ -675,24 +674,23 @@ impl MemoryCellClocks {
if !current_span.is_dummy() {
thread_clocks.clock.index_mut(index).span = current_span;
}
if self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock {
let race_free = if let Some(atomic) = self.atomic() {
atomic.write_vector <= thread_clocks.clock
&& atomic.read_vector <= thread_clocks.clock
} else {
true
};
self.write = (index, thread_clocks.clock[index]);
self.write_type = write_type;
if race_free {
self.read.set_zero_vector();
Ok(())
} else {
Err(DataRace)
}
} else {
Err(DataRace)
// Check synchronization with non-atomic accesses.
if !(self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock) {
return Err(DataRace);
}
// Check synchronization with atomic accesses.
if !self.atomic().is_none_or(|atomic| {
atomic.write_vector <= thread_clocks.clock && atomic.read_vector <= thread_clocks.clock
}) {
return Err(DataRace);
}
// Record this access.
self.write = (index, thread_clocks.clock[index]);
self.write_type = write_type;
self.read.set_zero_vector();
// This is not an atomic location any more.
self.atomic_ops = None;
Ok(())
}
}
@ -758,14 +756,9 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
let this = self.eval_context_mut();
this.atomic_access_check(dest, AtomicAccessType::Store)?;
// Read the previous value so we can put it in the store buffer later.
// The program didn't actually do a read, so suppress the memory access hooks.
// This is also a very special exception where we just ignore an error -- if this read
// was UB e.g. because the memory is uninitialized, we don't want to know!
let old_val = this.run_for_validation_ref(|this| this.read_scalar(dest)).discard_err();
// Inform GenMC about the atomic store.
if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
let old_val = this.run_for_validation_ref(|this| this.read_scalar(dest)).discard_err();
if genmc_ctx.atomic_store(
this,
dest.ptr().addr(),
@ -780,6 +773,9 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
}
return interp_ok(());
}
// Read the previous value so we can put it in the store buffer later.
let old_val = this.get_latest_nonatomic_val(dest);
this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
this.validate_atomic_store(dest, atomic)?;
this.buffered_atomic_write(val, dest, atomic, old_val)
@ -1201,7 +1197,7 @@ impl VClockAlloc {
/// operation for which data-race detection is handled separately, for example
/// atomic read operations. The `ty` parameter is used for diagnostics, letting
/// the user know which type was read.
pub fn read<'tcx>(
pub fn read_non_atomic<'tcx>(
&self,
alloc_id: AllocId,
access_range: AllocRange,
@ -1243,7 +1239,7 @@ impl VClockAlloc {
/// being created or if it is temporarily disabled during a racy read or write
/// operation. The `ty` parameter is used for diagnostics, letting
/// the user know which type was written.
pub fn write<'tcx>(
pub fn write_non_atomic<'tcx>(
&mut self,
alloc_id: AllocId,
access_range: AllocRange,
@ -1540,6 +1536,35 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
)
}
/// Returns the most recent *non-atomic* value stored in the given place.
/// Errors if we don't need that (because we don't do store buffering) or if
/// the most recent value is in fact atomic.
fn get_latest_nonatomic_val(&self, place: &MPlaceTy<'tcx>) -> Result<Option<Scalar>, ()> {
let this = self.eval_context_ref();
// These cannot fail because `atomic_access_check` was done first.
let (alloc_id, offset, _prov) = this.ptr_get_alloc_id(place.ptr(), 0).unwrap();
let alloc_meta = &this.get_alloc_extra(alloc_id).unwrap().data_race;
if alloc_meta.as_weak_memory_ref().is_none() {
// No reason to read old value if we don't track store buffers.
return Err(());
}
let data_race = alloc_meta.as_vclocks_ref().unwrap();
// Only read old value if this is currently a non-atomic location.
for (_range, clocks) in data_race.alloc_ranges.borrow_mut().iter(offset, place.layout.size)
{
// If this had an atomic write that's not before the non-atomic write, that should
// already be in the store buffer. Initializing the store buffer now would use the
// wrong `sync_clock` so we better make sure that does not happen.
if clocks.atomic().is_some_and(|atomic| !(atomic.write_vector <= clocks.write())) {
return Err(());
}
}
// The program didn't actually do a read, so suppress the memory access hooks.
// This is also a very special exception where we just ignore an error -- if this read
// was UB e.g. because the memory is uninitialized, we don't want to know!
Ok(this.run_for_validation_ref(|this| this.read_scalar(place)).discard_err())
}
/// Generic atomic operation implementation
fn validate_atomic_op<A: Debug + Copy>(
&self,

View file

@ -177,11 +177,11 @@ impl StoreBufferAlloc {
Self { store_buffers: RefCell::new(RangeObjectMap::new()) }
}
/// When a non-atomic access happens on a location that has been atomically accessed
/// before without data race, we can determine that the non-atomic access fully happens
/// When a non-atomic write happens on a location that has been atomically accessed
/// before without data race, we can determine that the non-atomic write fully happens
/// after all the prior atomic writes so the location no longer needs to exhibit
/// any weak memory behaviours until further atomic accesses.
pub fn memory_accessed(&self, range: AllocRange, global: &DataRaceState) {
/// any weak memory behaviours until further atomic writes.
pub fn non_atomic_write(&self, range: AllocRange, global: &DataRaceState) {
if !global.ongoing_action_data_race_free() {
let mut buffers = self.store_buffers.borrow_mut();
let access_type = buffers.access_type(range);
@ -223,18 +223,23 @@ impl StoreBufferAlloc {
fn get_or_create_store_buffer_mut<'tcx>(
&mut self,
range: AllocRange,
init: Option<Scalar>,
init: Result<Option<Scalar>, ()>,
) -> InterpResult<'tcx, &mut StoreBuffer> {
let buffers = self.store_buffers.get_mut();
let access_type = buffers.access_type(range);
let pos = match access_type {
AccessType::PerfectlyOverlapping(pos) => pos,
AccessType::Empty(pos) => {
let init =
init.expect("cannot have empty store buffer when previous write was atomic");
buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
pos
}
AccessType::ImperfectlyOverlapping(pos_range) => {
// Once we reach here we would've already checked that this access is not racy.
let init = init.expect(
"cannot have partially overlapping store buffer when previous write was atomic",
);
buffers.remove_pos_range(pos_range.clone());
buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
pos_range.start
@ -490,7 +495,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
}
let range = alloc_range(base_offset, place.layout.size);
let sync_clock = data_race_clocks.sync_clock(range);
let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Some(init))?;
let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Ok(Some(init)))?;
// The RMW always reads from the most recent store.
buffer.read_from_last_store(global, threads, atomic == AtomicRwOrd::SeqCst);
buffer.buffered_write(
@ -556,7 +561,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Add the given write to the store buffer. (Does not change machine memory.)
///
/// `init` says with which value to initialize the store buffer in case there wasn't a store
/// buffer for this memory range before.
/// buffer for this memory range before. `Err(())` means the value is not available;
/// `Ok(None)` means the memory does not contain a valid scalar.
///
/// Must be called *after* `validate_atomic_store` to ensure that `sync_clock` is up-to-date.
fn buffered_atomic_write(
@ -564,7 +570,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
val: Scalar,
dest: &MPlaceTy<'tcx>,
atomic: AtomicWriteOrd,
init: Option<Scalar>,
init: Result<Option<Scalar>, ()>,
) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr(), 0)?;

View file

@ -1536,14 +1536,11 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
genmc_ctx.memory_load(machine, ptr.addr(), range.size)?,
GlobalDataRaceHandler::Vclocks(_data_race) => {
let _trace = enter_trace_span!(data_race::before_memory_read);
let AllocDataRaceHandler::Vclocks(data_race, weak_memory) = &alloc_extra.data_race
let AllocDataRaceHandler::Vclocks(data_race, _weak_memory) = &alloc_extra.data_race
else {
unreachable!();
};
data_race.read(alloc_id, range, NaReadType::Read, None, machine)?;
if let Some(weak_memory) = weak_memory {
weak_memory.memory_accessed(range, machine.data_race.as_vclocks_ref().unwrap());
}
data_race.read_non_atomic(alloc_id, range, NaReadType::Read, None, machine)?;
}
}
if let Some(borrow_tracker) = &alloc_extra.borrow_tracker {
@ -1579,9 +1576,10 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
else {
unreachable!()
};
data_race.write(alloc_id, range, NaWriteType::Write, None, machine)?;
data_race.write_non_atomic(alloc_id, range, NaWriteType::Write, None, machine)?;
if let Some(weak_memory) = weak_memory {
weak_memory.memory_accessed(range, machine.data_race.as_vclocks_ref().unwrap());
weak_memory
.non_atomic_write(range, machine.data_race.as_vclocks_ref().unwrap());
}
}
}
@ -1612,7 +1610,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
GlobalDataRaceHandler::Vclocks(_global_state) => {
let _trace = enter_trace_span!(data_race::before_memory_deallocation);
let data_race = alloc_extra.data_race.as_vclocks_mut().unwrap();
data_race.write(
data_race.write_non_atomic(
alloc_id,
alloc_range(Size::ZERO, size),
NaWriteType::Deallocate,

View file

@ -88,10 +88,20 @@ fn initialization_write(add_fence: bool) {
check_all_outcomes([11, 22], || {
let x = static_atomic(11);
if add_fence {
// For the fun of it, let's make this location atomic and non-atomic again,
// to ensure Miri updates the state properly for that.
x.store(99, Relaxed);
unsafe { std::ptr::from_ref(x).cast_mut().write(11.into()) };
}
let wait = static_atomic(0);
let j1 = spawn(move || {
x.store(22, Relaxed);
// Since nobody else writes to `x`, we can non-atomically read it.
// (This tests that we do not delete the store buffer here.)
unsafe { std::ptr::from_ref(x).read() };
// Relaxed is intentional. We want to test if the thread 2 reads the initialisation write
// after a relaxed write
wait.store(1, Relaxed);

View file

@ -0,0 +1,42 @@
//! This reproduces #4655 every single time
//@ compile-flags: -Zmiri-fixed-schedule -Zmiri-disable-stacked-borrows
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{ptr, thread};
const SIZE: usize = 256;
static mut ARRAY: [u8; SIZE] = [0; _];
// Everything strictly less than this has been initialized by the sender.
static POS: AtomicUsize = AtomicUsize::new(0);
fn main() {
// Sender
let th = std::thread::spawn(|| {
for i in 0..SIZE {
unsafe { ptr::write(&raw mut ARRAY[i], 1) };
POS.store(i + 1, Ordering::Release);
thread::yield_now();
// We are the only writer, so we can do non-atomic reads as well.
unsafe { (&raw const POS).cast::<usize>().read() };
}
});
// Receiver
loop {
let i = POS.load(Ordering::Acquire);
if i > 0 {
unsafe { ptr::read(&raw const ARRAY[i - 1]) };
}
if i == SIZE {
break;
}
thread::yield_now();
}
th.join().unwrap();
}