diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs index 2483bcdf49a2..8b8694ac1835 100644 --- a/src/concurrency/data_race.rs +++ b/src/concurrency/data_race.rs @@ -287,6 +287,20 @@ impl MemoryCellClocks { Ok(()) } + /// Checks if the memory cell write races with any prior atomic read or write + fn write_race_free_with_atomic(&mut self, clocks: &ThreadClockSet) -> bool { + if let Some(atomic) = self.atomic() { + atomic.read_vector <= clocks.clock && atomic.write_vector <= clocks.clock + } else { + true + } + } + + /// Checks if the memory cell read races with any prior atomic write + fn read_race_free_with_atomic(&self, clocks: &ThreadClockSet) -> bool { + if let Some(atomic) = self.atomic() { atomic.write_vector <= clocks.clock } else { true } + } + /// Update memory cell data-race tracking for atomic /// load relaxed semantics, is a no-op if this memory was /// not used previously as atomic memory. @@ -514,6 +528,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { // the *value* (including the associated provenance if this is an AtomicPtr) at this location. // Only metadata on the location itself is used. let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?; + this.validate_overlapping_atomic_read(place)?; this.buffered_atomic_read(place, atomic, scalar, || { this.validate_atomic_load(place, atomic) }) @@ -527,6 +542,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { atomic: AtomicWriteOp, ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); + this.validate_overlapping_atomic_write(dest)?; this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?; this.validate_atomic_store(dest, atomic)?; // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause @@ -547,6 +563,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> { let this = self.eval_context_mut(); + this.validate_overlapping_atomic_write(place)?; let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?; // Atomics wrap around on overflow. @@ -575,6 +592,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { ) -> InterpResult<'tcx, ScalarMaybeUninit> { let this = self.eval_context_mut(); + this.validate_overlapping_atomic_write(place)?; let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?; this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?; @@ -595,6 +613,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> { let this = self.eval_context_mut(); + this.validate_overlapping_atomic_write(place)?; let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?; let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?; @@ -637,6 +656,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { use rand::Rng as _; let this = self.eval_context_mut(); + this.validate_overlapping_atomic_write(place)?; // Failure ordering cannot be stronger than success ordering, therefore first attempt // to read with the failure ordering and if successful then try again with the success // read ordering and write in the success case. @@ -686,6 +706,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { atomic: AtomicReadOp, ) -> InterpResult<'tcx> { let this = self.eval_context_ref(); + this.validate_overlapping_atomic_read(place)?; this.validate_atomic_op( place, atomic, @@ -708,6 +729,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { atomic: AtomicWriteOp, ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); + this.validate_overlapping_atomic_write(place)?; this.validate_atomic_op( place, atomic, @@ -733,6 +755,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { let acquire = matches!(atomic, Acquire | AcqRel | SeqCst); let release = matches!(atomic, Release | AcqRel | SeqCst); let this = self.eval_context_mut(); + this.validate_overlapping_atomic_write(place)?; this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| { if acquire { memory.load_acquire(clocks, index)?; @@ -918,6 +941,44 @@ impl VClockAlloc { ) } + /// Detect racing atomic writes (not data races) + /// on every byte of the current access range + pub(super) fn read_race_free_with_atomic<'tcx>( + &self, + range: AllocRange, + global: &GlobalState, + ) -> bool { + if global.race_detecting() { + let (_, clocks) = global.current_thread_state(); + let alloc_ranges = self.alloc_ranges.borrow(); + for (_, range) in alloc_ranges.iter(range.start, range.size) { + if !range.read_race_free_with_atomic(&clocks) { + return false; + } + } + } + true + } + + /// Detect racing atomic read and writes (not data races) + /// on every byte of the current access range + pub(super) fn write_race_free_with_atomic<'tcx>( + &mut self, + range: AllocRange, + global: &GlobalState, + ) -> bool { + if global.race_detecting() { + let (_, clocks) = global.current_thread_state(); + let alloc_ranges = self.alloc_ranges.get_mut(); + for (_, range) in alloc_ranges.iter_mut(range.start, range.size) { + if !range.write_race_free_with_atomic(&clocks) { + return false; + } + } + } + true + } + /// Detect data-races for an unsynchronized read operation, will not perform /// data-race detection if `race_detecting()` is false, either due to no threads /// being created or if it is temporarily disabled during a racy read or write @@ -1027,7 +1088,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> { let (alloc_id, base_offset, _tag) = this.ptr_get_alloc_id(place.ptr)?; // Load and log the atomic operation. // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option. - let alloc_meta = &this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap(); + let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap(); log::trace!( "Atomic op({}) with ordering {:?} on {:?} (size={})", description, diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs index 7d8d7da6dc4e..a4fbd14f4374 100644 --- a/src/concurrency/weak_memory.rs +++ b/src/concurrency/weak_memory.rs @@ -29,6 +29,13 @@ //! Additionally, writes in our implementation do not have globally unique timestamps attached. In the other two models this timestamp is //! used to make sure a value in a thread's view is not overwritten by a write that occured earlier than the one in the existing view. //! In our implementation, this is detected using read information attached to store elements, as there is no data strucutre representing reads. +//! +//! Safe/sound Rust allows for more operations on atomic locations than the C++20 atomic API was intended to allow, such as non-atomically accessing +//! a previously atomically accessed location, or accessing previously atomically accessed locations with a differently sized operation +//! (such as accessing the top 16 bits of an AtomicU32). These senarios are generally undiscussed in formalisations of C++ memory model. +//! In Rust, these operations can only be done through a `&mut AtomicFoo` reference or one derived from it, therefore these operations +//! can only happen after all previous accesses on the same locations. This implementation is adapted to allow these operations. +//! A mixed size/atomicity read that races with writes, or a write that races with reads or writes will still cause UBs to be thrown. // Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in ยง5.3: // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri), @@ -117,6 +124,14 @@ impl StoreBufferAlloc { Self { store_buffers: RefCell::new(AllocationMap::new()) } } + /// Checks if the range imperfectly overlaps with existing buffers + /// Used to determine if mixed-size atomic accesses + fn is_overlapping(&self, range: AllocRange) -> bool { + let buffers = self.store_buffers.borrow(); + let access_type = buffers.access_type(range); + matches!(access_type, AccessType::ImperfectlyOverlapping(_)) + } + /// When a non-atomic access happens on a location that has been atomically accessed /// before without data race, we can determine that the non-atomic access fully happens /// before all the prior atomic accesses so the location no longer needs to exhibit @@ -148,21 +163,16 @@ impl StoreBufferAlloc { let pos = match access_type { AccessType::PerfectlyOverlapping(pos) => pos, AccessType::Empty(pos) => { - let new_buffer = StoreBuffer::new(init); let mut buffers = self.store_buffers.borrow_mut(); - buffers.insert_at_pos(pos, range, new_buffer); + buffers.insert_at_pos(pos, range, StoreBuffer::new(init)); pos } AccessType::ImperfectlyOverlapping(pos_range) => { - // Accesses that imperfectly overlaps with existing atomic objects - // do not have well-defined behaviours. - // FIXME: if this access happens before all previous accesses on every object it overlaps - // with, then we would like to tolerate it. However this is not easy to check. - if pos_range.start + 1 == pos_range.end { - throw_ub_format!("mixed-size access on an existing atomic object"); - } else { - throw_ub_format!("access overlaps with multiple existing atomic objects"); - } + // Once we reach here we would've already checked that this access is not racy + let mut buffers = self.store_buffers.borrow_mut(); + buffers.remove_pos_range(pos_range.clone()); + buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init)); + pos_range.start } }; Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos])) @@ -179,16 +189,13 @@ impl StoreBufferAlloc { let pos = match access_type { AccessType::PerfectlyOverlapping(pos) => pos, AccessType::Empty(pos) => { - let new_buffer = StoreBuffer::new(init); - buffers.insert_at_pos(pos, range, new_buffer); + buffers.insert_at_pos(pos, range, StoreBuffer::new(init)); pos } AccessType::ImperfectlyOverlapping(pos_range) => { - if pos_range.start + 1 == pos_range.end { - throw_ub_format!("mixed-size access on an existing atomic object"); - } else { - throw_ub_format!("access overlaps with multiple existing atomic objects"); - } + buffers.remove_pos_range(pos_range.clone()); + buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init)); + pos_range.start } }; Ok(&mut buffers[pos]) @@ -392,6 +399,55 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mi pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> { + // If weak memory emulation is enabled, check if this atomic op imperfectly overlaps with a previous + // atomic write. If it does, then we require it to be ordered (non-racy) with all previous atomic + // writes on all the bytes in range + fn validate_overlapping_atomic_read(&self, place: &MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx> { + let this = self.eval_context_ref(); + let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?; + if let crate::AllocExtra { + weak_memory: Some(alloc_buffers), + data_race: Some(alloc_clocks), + .. + } = this.get_alloc_extra(alloc_id)? + { + let range = alloc_range(base_offset, place.layout.size); + if alloc_buffers.is_overlapping(range) + && !alloc_clocks + .read_race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap()) + { + throw_ub_format!("racy imperfectly overlapping atomic access"); + } + } + Ok(()) + } + + // Same as above but needs to be ordered with all previous atomic read or writes + fn validate_overlapping_atomic_write( + &mut self, + place: &MPlaceTy<'tcx, Tag>, + ) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?; + if let ( + crate::AllocExtra { + weak_memory: Some(alloc_buffers), + data_race: Some(alloc_clocks), + .. + }, + crate::Evaluator { data_race: Some(global), .. }, + ) = this.get_alloc_extra_mut(alloc_id)? + { + let range = alloc_range(base_offset, place.layout.size); + if alloc_buffers.is_overlapping(range) + && !alloc_clocks.write_race_free_with_atomic(range, global) + { + throw_ub_format!("racy imperfectly overlapping atomic access"); + } + } + Ok(()) + } + fn buffered_atomic_rmw( &mut self, new_val: ScalarMaybeUninit, diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs index 34097f4a8937..7dad0a12e5dd 100644 --- a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs +++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs @@ -9,8 +9,8 @@ // so we have to stick to C++11 emulation from existing research. use std::sync::atomic::Ordering::*; -use std::thread::spawn; use std::sync::atomic::{fence, AtomicUsize}; +use std::thread::spawn; // Spins until it reads the given value fn reads_value(loc: &AtomicUsize, val: usize) -> usize { @@ -25,7 +25,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize { fn static_atomic(val: usize) -> &'static AtomicUsize { let ret = Box::leak(Box::new(AtomicUsize::new(val))); // A workaround to put the initialization value in the store buffer. - ret.store(val, Relaxed); + ret.load(Relaxed); ret } @@ -82,4 +82,4 @@ pub fn main() { for _ in 0..500 { test_cpp20_rwc_syncs(); } -} \ No newline at end of file +} diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.rs b/tests/compile-fail/weak_memory/racing_mixed_size.rs new file mode 100644 index 000000000000..d4ba48afe39d --- /dev/null +++ b/tests/compile-fail/weak_memory/racing_mixed_size.rs @@ -0,0 +1,38 @@ +// compile-flags: -Zmiri-ignore-leaks + +#![feature(core_intrinsics)] + +use std::sync::atomic::AtomicU32; +use std::sync::atomic::Ordering::*; +use std::thread::spawn; + +fn static_atomic_u32(val: u32) -> &'static AtomicU32 { + let ret = Box::leak(Box::new(AtomicU32::new(val))); + ret +} + +fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] { + unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) } +} + +// Wine's SRWLock implementation does this, which is definitely undefined in C++ memory model +// https://github.com/wine-mirror/wine/blob/303f8042f9db508adaca02ef21f8de4992cb9c03/dlls/ntdll/sync.c#L543-L566 +// Though it probably works just fine on x86 +pub fn main() { + let x = static_atomic_u32(0); + let j1 = spawn(move || { + x.store(1, Relaxed); + }); + + let j2 = spawn(move || { + let x_ptr = x as *const AtomicU32 as *const u32; + let x_split = split_u32_ptr(x_ptr); + unsafe { + let hi = &(*x_split)[0] as *const u16; + std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping + } + }); + + j1.join().unwrap(); + j2.join().unwrap(); +} diff --git a/tests/run-pass/weak_memory/extra_cpp.rs b/tests/run-pass/weak_memory/extra_cpp.rs index b20ec5834990..b1a683798bb3 100644 --- a/tests/run-pass/weak_memory/extra_cpp.rs +++ b/tests/run-pass/weak_memory/extra_cpp.rs @@ -4,13 +4,19 @@ // but doable in safe (at least sound) Rust. #![feature(atomic_from_mut)] +#![feature(core_intrinsics)] use std::sync::atomic::Ordering::*; -use std::sync::atomic::{AtomicU16, AtomicU32, AtomicUsize}; +use std::sync::atomic::{AtomicU16, AtomicU32}; use std::thread::spawn; -fn static_atomic_mut(val: usize) -> &'static mut AtomicUsize { - let ret = Box::leak(Box::new(AtomicUsize::new(val))); +fn static_atomic_mut(val: u32) -> &'static mut AtomicU32 { + let ret = Box::leak(Box::new(AtomicU32::new(val))); + ret +} + +fn static_atomic(val: u32) -> &'static AtomicU32 { + let ret = Box::leak(Box::new(AtomicU32::new(val))); ret } @@ -18,6 +24,10 @@ fn split_u32(dword: &mut u32) -> &mut [u16; 2] { unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) } } +fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] { + unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) } +} + fn mem_replace() { let mut x = AtomicU32::new(0); @@ -31,7 +41,7 @@ fn assign_to_mut() { let x = static_atomic_mut(0); x.store(1, Relaxed); - *x = AtomicUsize::new(2); + *x = AtomicU32::new(2); assert_eq!(x.load(Relaxed), 2); } @@ -70,10 +80,81 @@ fn from_mut_split() { assert_eq!(x_lo_atomic.load(Relaxed), u16::from_be(0xfafa)); } +// Although not possible to do in safe Rust, +// we allow non-atomic and atomic reads to race +// as this should be sound +fn racing_mixed_atomicity_read() { + let x = static_atomic(0); + x.store(42, Relaxed); + + let j1 = spawn(move || x.load(Relaxed)); + + let j2 = spawn(move || { + let x_ptr = x as *const AtomicU32 as *const u32; + unsafe { std::intrinsics::atomic_load_relaxed(x_ptr) } + }); + + let r1 = j1.join().unwrap(); + let r2 = j2.join().unwrap(); + + assert_eq!(r1, 42); + assert_eq!(r2, 42); +} + +fn racing_mixed_size_read() { + let x = static_atomic(0); + + let j1 = spawn(move || { + x.load(Relaxed); + }); + + let j2 = spawn(move || { + let x_ptr = x as *const AtomicU32 as *const u32; + let x_split = split_u32_ptr(x_ptr); + unsafe { + let hi = &(*x_split)[0] as *const u16; + std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping + } + }); + + j1.join().unwrap(); + j2.join().unwrap(); +} + +fn racing_mixed_atomicity_and_size_read() { + let x = static_atomic(u32::from_be(0xabbafafa)); + + let j1 = spawn(move || { + x.load(Relaxed); + }); + + let j2 = spawn(move || { + let x_ptr = x as *const AtomicU32 as *const u32; + unsafe { *x_ptr }; + }); + + let j3 = spawn(move || { + let x_ptr = x as *const AtomicU32 as *const u32; + let x_split = split_u32_ptr(x_ptr); + unsafe { + let hi = &(*x_split)[0] as *const u16; + std::intrinsics::atomic_load_relaxed(hi) + } + }); + + j1.join().unwrap(); + j2.join().unwrap(); + let r3 = j3.join().unwrap(); + + assert_eq!(r3, u16::from_be(0xabba)); +} pub fn main() { get_mut_write(); from_mut_split(); assign_to_mut(); mem_replace(); + racing_mixed_atomicity_read(); + racing_mixed_size_read(); + racing_mixed_atomicity_and_size_read(); }