make SC fences stronger, to be correct wrt C++20
This commit is contained in:
parent
9449cb9563
commit
fe856815aa
6 changed files with 120 additions and 147 deletions
|
|
@ -68,9 +68,10 @@ Further caveats that Miri users should be aware of:
|
|||
not support networking. System API support varies between targets; if you run
|
||||
on Windows it is a good idea to use `--target x86_64-unknown-linux-gnu` to get
|
||||
better support.
|
||||
* Weak memory emulation may [produce weak behaviors](https://github.com/rust-lang/miri/issues/2301)
|
||||
when `SeqCst` fences are used that are not actually permitted by the Rust memory model, and it
|
||||
cannot produce all behaviors possibly observable on real hardware.
|
||||
* Weak memory emulation is not complete: there are legal behaviors that Miri will never produce.
|
||||
However, Miri produces many behaviors that are hard to observe on real hardware, so it can help
|
||||
quite a bit in finding weak memory concurrency bugs. To be really sure about complicated atomic
|
||||
code, use specialized tools such as [loom](https://github.com/tokio-rs/loom).
|
||||
|
||||
Moreover, Miri fundamentally cannot ensure that your code is *sound*. [Soundness] is the property of
|
||||
never causing undefined behavior when invoked from arbitrary safe code, even in combination with
|
||||
|
|
|
|||
|
|
@ -111,12 +111,6 @@ pub(super) struct ThreadClockSet {
|
|||
/// have been released by this thread by a release fence.
|
||||
fence_release: VClock,
|
||||
|
||||
/// Timestamps of the last SC fence performed by each
|
||||
/// thread, updated when this thread performs an SC fence.
|
||||
/// This is never acquired into the thread's clock, it
|
||||
/// just limits which old writes can be seen in weak memory emulation.
|
||||
pub(super) fence_seqcst: VClock,
|
||||
|
||||
/// Timestamps of the last SC write performed by each
|
||||
/// thread, updated when this thread performs an SC fence.
|
||||
/// This is never acquired into the thread's clock, it
|
||||
|
|
@ -344,11 +338,13 @@ pub struct GlobalState {
|
|||
/// active vector-clocks catch up with the threads timestamp.
|
||||
reuse_candidates: RefCell<FxHashSet<VectorIdx>>,
|
||||
|
||||
/// The timestamp of last SC fence performed by each thread
|
||||
/// We make SC fences act like RMWs on a global location.
|
||||
/// To implement that, they all release and acquire into this clock.
|
||||
last_sc_fence: RefCell<VClock>,
|
||||
|
||||
/// The timestamp of last SC write performed by each thread
|
||||
last_sc_write: RefCell<VClock>,
|
||||
/// The timestamp of last SC write performed by each thread.
|
||||
/// Threads only update their own index here!
|
||||
last_sc_write_per_thread: RefCell<VClock>,
|
||||
|
||||
/// Track when an outdated (weak memory) load happens.
|
||||
pub track_outdated_loads: bool,
|
||||
|
|
@ -883,9 +879,17 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
|
|||
clocks.apply_release_fence();
|
||||
}
|
||||
if atomic == AtomicFenceOrd::SeqCst {
|
||||
data_race.last_sc_fence.borrow_mut().set_at_index(&clocks.clock, index);
|
||||
clocks.fence_seqcst.join(&data_race.last_sc_fence.borrow());
|
||||
clocks.write_seqcst.join(&data_race.last_sc_write.borrow());
|
||||
// Behave like an RMW on the global fence location. This takes full care of
|
||||
// all the SC fence requirements, including C++17 §32.4 [atomics.order]
|
||||
// paragraph 6 (which would limit what future reads can see). It also rules
|
||||
// out many legal behaviors, but we don't currently have a model that would
|
||||
// be more precise.
|
||||
let mut sc_fence_clock = data_race.last_sc_fence.borrow_mut();
|
||||
sc_fence_clock.join(&clocks.clock);
|
||||
clocks.clock.join(&sc_fence_clock);
|
||||
// Also establish some sort of order with the last SC write that happened, globally
|
||||
// (but this is only respected by future reads).
|
||||
clocks.write_seqcst.join(&data_race.last_sc_write_per_thread.borrow());
|
||||
}
|
||||
|
||||
// Increment timestamp in case of release semantics.
|
||||
|
|
@ -1555,7 +1559,7 @@ impl GlobalState {
|
|||
thread_info: RefCell::new(IndexVec::new()),
|
||||
reuse_candidates: RefCell::new(FxHashSet::default()),
|
||||
last_sc_fence: RefCell::new(VClock::default()),
|
||||
last_sc_write: RefCell::new(VClock::default()),
|
||||
last_sc_write_per_thread: RefCell::new(VClock::default()),
|
||||
track_outdated_loads: config.track_outdated_loads,
|
||||
};
|
||||
|
||||
|
|
@ -1857,7 +1861,7 @@ impl GlobalState {
|
|||
// SC ATOMIC STORE rule in the paper.
|
||||
pub(super) fn sc_write(&self, thread_mgr: &ThreadManager<'_>) {
|
||||
let (index, clocks) = self.active_thread_state(thread_mgr);
|
||||
self.last_sc_write.borrow_mut().set_at_index(&clocks.clock, index);
|
||||
self.last_sc_write_per_thread.borrow_mut().set_at_index(&clocks.clock, index);
|
||||
}
|
||||
|
||||
// SC ATOMIC READ rule in the paper.
|
||||
|
|
|
|||
|
|
@ -11,14 +11,17 @@
|
|||
//! This implementation is not fully correct under the revised C++20 model and may generate behaviours C++20
|
||||
//! disallows (<https://github.com/rust-lang/miri/issues/2301>).
|
||||
//!
|
||||
//! A modification is made to the paper's model to partially address C++20 changes.
|
||||
//! Specifically, if an SC load reads from an atomic store of any ordering, then a later SC load cannot read from
|
||||
//! an earlier store in the location's modification order. This is to prevent creating a backwards S edge from the second
|
||||
//! load to the first, as a result of C++20's coherence-ordered before rules.
|
||||
//! (This seems to rule out behaviors that were actually permitted by the RC11 model that C++20
|
||||
//! intended to copy (https://plv.mpi-sws.org/scfix/paper.pdf); a change was introduced when
|
||||
//! translating the math to English. According to Viktor Vafeiadis, this difference is harmless. So
|
||||
//! we stick to what the standard says, and allow fewer behaviors.)
|
||||
//! Modifications are made to the paper's model to address C++20 changes:
|
||||
//! - If an SC load reads from an atomic store of any ordering, then a later SC load cannot read
|
||||
//! from an earlier store in the location's modification order. This is to prevent creating a
|
||||
//! backwards S edge from the second load to the first, as a result of C++20's coherence-ordered
|
||||
//! before rules. (This seems to rule out behaviors that were actually permitted by the RC11 model
|
||||
//! that C++20 intended to copy (<https://plv.mpi-sws.org/scfix/paper.pdf>); a change was
|
||||
//! introduced when translating the math to English. According to Viktor Vafeiadis, this
|
||||
//! difference is harmless. So we stick to what the standard says, and allow fewer behaviors.)
|
||||
//! - SC fences are treated like AcqRel RMWs to a global clock, to ensure they induce enough
|
||||
//! synchronization with the surrounding accesses. This rules out legal behavior, but it is really
|
||||
//! hard to be more precise here.
|
||||
//!
|
||||
//! Rust follows the C++20 memory model (except for the Consume ordering and some operations not performable through C++'s
|
||||
//! `std::atomic<T>` API). It is therefore possible for this implementation to generate behaviours never observable when the
|
||||
|
|
@ -340,11 +343,6 @@ impl<'tcx> StoreBuffer {
|
|||
// then we cannot read-from anything earlier in modification order.
|
||||
// C++20 §6.9.2.2 [intro.races] paragraph 16
|
||||
false
|
||||
} else if store_elem.timestamp <= clocks.fence_seqcst[store_elem.store_index] {
|
||||
// The current load, which may be sequenced-after an SC fence, cannot read-before
|
||||
// the last store sequenced-before an SC fence in another thread.
|
||||
// C++17 §32.4 [atomics.order] paragraph 6
|
||||
false
|
||||
} else if store_elem.timestamp <= clocks.write_seqcst[store_elem.store_index]
|
||||
&& store_elem.is_seqcst
|
||||
{
|
||||
|
|
|
|||
|
|
@ -1,87 +0,0 @@
|
|||
//@compile-flags: -Zmiri-ignore-leaks
|
||||
|
||||
// https://plv.mpi-sws.org/scfix/paper.pdf
|
||||
// 2.2 Second Problem: SC Fences are Too Weak
|
||||
// This test should pass under the C++20 model Rust is using.
|
||||
// Unfortunately, Miri's weak memory emulation only follows the C++11 model
|
||||
// as we don't know how to correctly emulate C++20's revised SC semantics,
|
||||
// so we have to stick to C++11 emulation from existing research.
|
||||
|
||||
use std::sync::atomic::Ordering::*;
|
||||
use std::sync::atomic::{AtomicUsize, fence};
|
||||
use std::thread::spawn;
|
||||
|
||||
// Spins until it reads the given value
|
||||
fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
|
||||
while loc.load(Relaxed) != val {
|
||||
std::hint::spin_loop();
|
||||
}
|
||||
val
|
||||
}
|
||||
|
||||
// We can't create static items because we need to run each test
|
||||
// multiple tests
|
||||
fn static_atomic(val: usize) -> &'static AtomicUsize {
|
||||
let ret = Box::leak(Box::new(AtomicUsize::new(val)));
|
||||
// A workaround to put the initialization value in the store buffer.
|
||||
// See https://github.com/rust-lang/miri/issues/2164
|
||||
ret.load(Relaxed);
|
||||
ret
|
||||
}
|
||||
|
||||
fn test_cpp20_rwc_syncs() {
|
||||
/*
|
||||
int main() {
|
||||
atomic_int x = 0;
|
||||
atomic_int y = 0;
|
||||
|
||||
{{{ x.store(1,mo_relaxed);
|
||||
||| { r1=x.load(mo_relaxed).readsvalue(1);
|
||||
fence(mo_seq_cst);
|
||||
r2=y.load(mo_relaxed); }
|
||||
||| { y.store(1,mo_relaxed);
|
||||
fence(mo_seq_cst);
|
||||
r3=x.load(mo_relaxed); }
|
||||
}}}
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
let x = static_atomic(0);
|
||||
let y = static_atomic(0);
|
||||
|
||||
let j1 = spawn(move || {
|
||||
x.store(1, Relaxed);
|
||||
});
|
||||
|
||||
let j2 = spawn(move || {
|
||||
reads_value(&x, 1);
|
||||
fence(SeqCst);
|
||||
y.load(Relaxed)
|
||||
});
|
||||
|
||||
let j3 = spawn(move || {
|
||||
y.store(1, Relaxed);
|
||||
fence(SeqCst);
|
||||
x.load(Relaxed)
|
||||
});
|
||||
|
||||
j1.join().unwrap();
|
||||
let b = j2.join().unwrap();
|
||||
let c = j3.join().unwrap();
|
||||
|
||||
// We cannot write assert_ne!() since ui_test's fail
|
||||
// tests expect exit status 1, whereas panics produce 101.
|
||||
// Our ui_test does not yet support overriding failure status codes.
|
||||
if (b, c) == (0, 0) {
|
||||
// This *should* be unreachable, but Miri will reach it.
|
||||
unsafe {
|
||||
std::hint::unreachable_unchecked(); //~ERROR: unreachable
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
for _ in 0..500 {
|
||||
test_cpp20_rwc_syncs();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,20 +0,0 @@
|
|||
error: Undefined Behavior: entering unreachable code
|
||||
--> tests/fail/should-pass/cpp20_rwc_syncs.rs:LL:CC
|
||||
|
|
||||
LL | std::hint::unreachable_unchecked();
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ entering unreachable code
|
||||
|
|
||||
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
|
||||
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
|
||||
= note: BACKTRACE:
|
||||
= note: inside `test_cpp20_rwc_syncs` at tests/fail/should-pass/cpp20_rwc_syncs.rs:LL:CC
|
||||
note: inside `main`
|
||||
--> tests/fail/should-pass/cpp20_rwc_syncs.rs:LL:CC
|
||||
|
|
||||
LL | test_cpp20_rwc_syncs();
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
|
||||
|
||||
error: aborting due to 1 previous error
|
||||
|
||||
|
|
@ -22,7 +22,7 @@
|
|||
// Available: https://ss265.host.cs.st-andrews.ac.uk/papers/n3132.pdf.
|
||||
|
||||
use std::sync::atomic::Ordering::*;
|
||||
use std::sync::atomic::{AtomicBool, AtomicI32, fence};
|
||||
use std::sync::atomic::{AtomicBool, AtomicI32, Ordering, fence};
|
||||
use std::thread::spawn;
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
|
|
@ -45,8 +45,8 @@ fn static_atomic_bool(val: bool) -> &'static AtomicBool {
|
|||
}
|
||||
|
||||
// Spins until it acquires a pre-determined value.
|
||||
fn acquires_value(loc: &AtomicI32, val: i32) -> i32 {
|
||||
while loc.load(Acquire) != val {
|
||||
fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 {
|
||||
while loc.load(ord) != val {
|
||||
std::hint::spin_loop();
|
||||
}
|
||||
val
|
||||
|
|
@ -69,7 +69,7 @@ fn test_corr() {
|
|||
}); // | |
|
||||
#[rustfmt::skip] // |synchronizes-with |happens-before
|
||||
let j3 = spawn(move || { // | |
|
||||
acquires_value(&y, 1); // <------------------+ |
|
||||
loads_value(&y, Acquire, 1); // <------------+ |
|
||||
x.load(Relaxed) // <----------------------------------------------+
|
||||
// The two reads on x are ordered by hb, so they cannot observe values
|
||||
// differently from the modification order. If the first read observed
|
||||
|
|
@ -94,12 +94,12 @@ fn test_wrc() {
|
|||
}); // | |
|
||||
#[rustfmt::skip] // |synchronizes-with |
|
||||
let j2 = spawn(move || { // | |
|
||||
acquires_value(&x, 1); // <------------------+ |
|
||||
loads_value(&x, Acquire, 1); // <------------+ |
|
||||
y.store(1, Release); // ---------------------+ |happens-before
|
||||
}); // | |
|
||||
#[rustfmt::skip] // |synchronizes-with |
|
||||
let j3 = spawn(move || { // | |
|
||||
acquires_value(&y, 1); // <------------------+ |
|
||||
loads_value(&y, Acquire, 1); // <------------+ |
|
||||
x.load(Relaxed) // <-----------------------------------------------+
|
||||
});
|
||||
|
||||
|
|
@ -125,7 +125,7 @@ fn test_message_passing() {
|
|||
#[rustfmt::skip] // |synchronizes-with | happens-before
|
||||
let j2 = spawn(move || { // | |
|
||||
let x = x; // avoid field capturing | |
|
||||
acquires_value(&y, 1); // <------------------+ |
|
||||
loads_value(&y, Acquire, 1); // <------------+ |
|
||||
unsafe { *x.0 } // <---------------------------------------------+
|
||||
});
|
||||
|
||||
|
|
@ -268,9 +268,6 @@ fn test_iriw_sc_rlx() {
|
|||
let x = static_atomic_bool(false);
|
||||
let y = static_atomic_bool(false);
|
||||
|
||||
x.store(false, Relaxed);
|
||||
y.store(false, Relaxed);
|
||||
|
||||
let a = spawn(move || x.store(true, Relaxed));
|
||||
let b = spawn(move || y.store(true, Relaxed));
|
||||
let c = spawn(move || {
|
||||
|
|
@ -290,6 +287,84 @@ fn test_iriw_sc_rlx() {
|
|||
assert!(c || d);
|
||||
}
|
||||
|
||||
// Similar to `test_iriw_sc_rlx` but with fences instead of SC accesses.
|
||||
fn test_cpp20_sc_fence_fix() {
|
||||
let x = static_atomic_bool(false);
|
||||
let y = static_atomic_bool(false);
|
||||
|
||||
let thread1 = spawn(|| {
|
||||
let a = x.load(Relaxed);
|
||||
fence(SeqCst);
|
||||
let b = y.load(Relaxed);
|
||||
(a, b)
|
||||
});
|
||||
|
||||
let thread2 = spawn(|| {
|
||||
x.store(true, Relaxed);
|
||||
});
|
||||
let thread3 = spawn(|| {
|
||||
y.store(true, Relaxed);
|
||||
});
|
||||
|
||||
let thread4 = spawn(|| {
|
||||
let c = y.load(Relaxed);
|
||||
fence(SeqCst);
|
||||
let d = x.load(Relaxed);
|
||||
(c, d)
|
||||
});
|
||||
|
||||
let (a, b) = thread1.join().unwrap();
|
||||
thread2.join().unwrap();
|
||||
thread3.join().unwrap();
|
||||
let (c, d) = thread4.join().unwrap();
|
||||
let bad = a == true && b == false && c == true && d == false;
|
||||
assert!(!bad);
|
||||
}
|
||||
|
||||
// https://plv.mpi-sws.org/scfix/paper.pdf
|
||||
// 2.2 Second Problem: SC Fences are Too Weak
|
||||
fn test_cpp20_rwc_syncs() {
|
||||
/*
|
||||
int main() {
|
||||
atomic_int x = 0;
|
||||
atomic_int y = 0;
|
||||
{{{ x.store(1,mo_relaxed);
|
||||
||| { r1=x.load(mo_relaxed).readsvalue(1);
|
||||
fence(mo_seq_cst);
|
||||
r2=y.load(mo_relaxed); }
|
||||
||| { y.store(1,mo_relaxed);
|
||||
fence(mo_seq_cst);
|
||||
r3=x.load(mo_relaxed); }
|
||||
}}}
|
||||
return 0;
|
||||
}
|
||||
*/
|
||||
let x = static_atomic(0);
|
||||
let y = static_atomic(0);
|
||||
|
||||
let j1 = spawn(move || {
|
||||
x.store(1, Relaxed);
|
||||
});
|
||||
|
||||
let j2 = spawn(move || {
|
||||
loads_value(&x, Relaxed, 1);
|
||||
fence(SeqCst);
|
||||
y.load(Relaxed)
|
||||
});
|
||||
|
||||
let j3 = spawn(move || {
|
||||
y.store(1, Relaxed);
|
||||
fence(SeqCst);
|
||||
x.load(Relaxed)
|
||||
});
|
||||
|
||||
j1.join().unwrap();
|
||||
let b = j2.join().unwrap();
|
||||
let c = j3.join().unwrap();
|
||||
|
||||
assert!((b, c) != (0, 0));
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
for _ in 0..50 {
|
||||
test_single_thread();
|
||||
|
|
@ -301,5 +376,7 @@ pub fn main() {
|
|||
test_sc_store_buffering();
|
||||
test_sync_through_rmw_and_fences();
|
||||
test_iriw_sc_rlx();
|
||||
test_cpp20_sc_fence_fix();
|
||||
test_cpp20_rwc_syncs();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue