diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 2483bcdf49a2..8b8694ac1835 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -287,6 +287,20 @@ impl MemoryCellClocks {
         Ok(())
     }
 
+    /// Checks if the memory cell write races with any prior atomic read or write
+    fn write_race_free_with_atomic(&mut self, clocks: &ThreadClockSet) -> bool {
+        if let Some(atomic) = self.atomic() {
+            atomic.read_vector <= clocks.clock && atomic.write_vector <= clocks.clock
+        } else {
+            true
+        }
+    }
+
+    /// Checks if the memory cell read races with any prior atomic write
+    fn read_race_free_with_atomic(&self, clocks: &ThreadClockSet) -> bool {
+        if let Some(atomic) = self.atomic() { atomic.write_vector <= clocks.clock } else { true }
+    }
+
     /// Update memory cell data-race tracking for atomic
     /// load relaxed semantics, is a no-op if this memory was
     /// not used previously as atomic memory.
@@ -514,6 +528,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
         // Only metadata on the location itself is used.
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
+        this.validate_overlapping_atomic_read(place)?;
         this.buffered_atomic_read(place, atomic, scalar, || {
             this.validate_atomic_load(place, atomic)
         })
@@ -527,6 +542,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(dest)?;
         this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
         this.validate_atomic_store(dest, atomic)?;
         // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
@@ -547,6 +563,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic_write(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
 
         // Atomics wrap around on overflow.
@@ -575,6 +592,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic_write(place)?;
         let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
         this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
 
@@ -595,6 +613,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic_write(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
         let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?;
 
@@ -637,6 +656,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         use rand::Rng as _;
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic_write(place)?;
         // Failure ordering cannot be stronger than success ordering, therefore first attempt
         // to read with the failure ordering and if successful then try again with the success
         // read ordering and write in the success case.
@@ -686,6 +706,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicReadOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
+        this.validate_overlapping_atomic_read(place)?;
         this.validate_atomic_op(
             place,
             atomic,
@@ -708,6 +729,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(place)?;
         this.validate_atomic_op(
             place,
             atomic,
@@ -733,6 +755,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
         let release = matches!(atomic, Release | AcqRel | SeqCst);
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(place)?;
         this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
             if acquire {
                 memory.load_acquire(clocks, index)?;
@@ -918,6 +941,44 @@ impl VClockAlloc {
         )
     }
 
+    /// Detect racing atomic writes (not data races)
+    /// on every byte of the current access range
+    pub(super) fn read_race_free_with_atomic<'tcx>(
+        &self,
+        range: AllocRange,
+        global: &GlobalState,
+    ) -> bool {
+        if global.race_detecting() {
+            let (_, clocks) = global.current_thread_state();
+            let alloc_ranges = self.alloc_ranges.borrow();
+            for (_, range) in alloc_ranges.iter(range.start, range.size) {
+                if !range.read_race_free_with_atomic(&clocks) {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
+    /// Detect racing atomic read and writes (not data races)
+    /// on every byte of the current access range
+    pub(super) fn write_race_free_with_atomic<'tcx>(
+        &mut self,
+        range: AllocRange,
+        global: &GlobalState,
+    ) -> bool {
+        if global.race_detecting() {
+            let (_, clocks) = global.current_thread_state();
+            let alloc_ranges = self.alloc_ranges.get_mut();
+            for (_, range) in alloc_ranges.iter_mut(range.start, range.size) {
+                if !range.write_race_free_with_atomic(&clocks) {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
     /// Detect data-races for an unsynchronized read operation, will not perform
     /// data-race detection if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
@@ -1027,7 +1088,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                 let (alloc_id, base_offset, _tag) = this.ptr_get_alloc_id(place.ptr)?;
                 // Load and log the atomic operation.
                 // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
-                let alloc_meta = &this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
+                let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
                 log::trace!(
                     "Atomic op({}) with ordering {:?} on {:?} (size={})",
                     description,
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 7d8d7da6dc4e..a4fbd14f4374 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -29,6 +29,13 @@
 //! Additionally, writes in our implementation do not have globally unique timestamps attached. In the other two models this timestamp is
 //! used to make sure a value in a thread's view is not overwritten by a write that occured earlier than the one in the existing view.
 //! In our implementation, this is detected using read information attached to store elements, as there is no data strucutre representing reads.
+//!
+//! Safe/sound Rust allows for more operations on atomic locations than the C++20 atomic API was intended to allow, such as non-atomically accessing
+//! a previously atomically accessed location, or accessing previously atomically accessed locations with a differently sized operation
+//! (such as accessing the top 16 bits of an AtomicU32). These senarios are generally undiscussed in formalisations of C++ memory model.
+//! In Rust, these operations can only be done through a `&mut AtomicFoo` reference or one derived from it, therefore these operations
+//! can only happen after all previous accesses on the same locations. This implementation is adapted to allow these operations.
+//! A mixed size/atomicity read that races with writes, or a write that races with reads or writes will still cause UBs to be thrown.
 
 // Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
 // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
@@ -117,6 +124,14 @@ impl StoreBufferAlloc {
         Self { store_buffers: RefCell::new(AllocationMap::new()) }
     }
 
+    /// Checks if the range imperfectly overlaps with existing buffers
+    /// Used to determine if mixed-size atomic accesses
+    fn is_overlapping(&self, range: AllocRange) -> bool {
+        let buffers = self.store_buffers.borrow();
+        let access_type = buffers.access_type(range);
+        matches!(access_type, AccessType::ImperfectlyOverlapping(_))
+    }
+
     /// When a non-atomic access happens on a location that has been atomically accessed
     /// before without data race, we can determine that the non-atomic access fully happens
     /// before all the prior atomic accesses so the location no longer needs to exhibit
@@ -148,21 +163,16 @@ impl StoreBufferAlloc {
         let pos = match access_type {
             AccessType::PerfectlyOverlapping(pos) => pos,
             AccessType::Empty(pos) => {
-                let new_buffer = StoreBuffer::new(init);
                 let mut buffers = self.store_buffers.borrow_mut();
-                buffers.insert_at_pos(pos, range, new_buffer);
+                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
-                // Accesses that imperfectly overlaps with existing atomic objects
-                // do not have well-defined behaviours.
-                // FIXME: if this access happens before all previous accesses on every object it overlaps
-                // with, then we would like to tolerate it. However this is not easy to check.
-                if pos_range.start + 1 == pos_range.end {
-                    throw_ub_format!("mixed-size access on an existing atomic object");
-                } else {
-                    throw_ub_format!("access overlaps with multiple existing atomic objects");
-                }
+                // Once we reach here we would've already checked that this access is not racy
+                let mut buffers = self.store_buffers.borrow_mut();
+                buffers.remove_pos_range(pos_range.clone());
+                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
+                pos_range.start
             }
         };
         Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]))
@@ -179,16 +189,13 @@ impl StoreBufferAlloc {
         let pos = match access_type {
             AccessType::PerfectlyOverlapping(pos) => pos,
             AccessType::Empty(pos) => {
-                let new_buffer = StoreBuffer::new(init);
-                buffers.insert_at_pos(pos, range, new_buffer);
+                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
-                if pos_range.start + 1 == pos_range.end {
-                    throw_ub_format!("mixed-size access on an existing atomic object");
-                } else {
-                    throw_ub_format!("access overlaps with multiple existing atomic objects");
-                }
+                buffers.remove_pos_range(pos_range.clone());
+                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
+                pos_range.start
             }
         };
         Ok(&mut buffers[pos])
@@ -392,6 +399,55 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mi
 pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
     crate::MiriEvalContextExt<'mir, 'tcx>
 {
+    // If weak memory emulation is enabled, check if this atomic op imperfectly overlaps with a previous
+    // atomic write. If it does, then we require it to be ordered (non-racy) with all previous atomic
+    // writes on all the bytes in range
+    fn validate_overlapping_atomic_read(&self, place: &MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let crate::AllocExtra {
+            weak_memory: Some(alloc_buffers),
+            data_race: Some(alloc_clocks),
+            ..
+        } = this.get_alloc_extra(alloc_id)?
+        {
+            let range = alloc_range(base_offset, place.layout.size);
+            if alloc_buffers.is_overlapping(range)
+                && !alloc_clocks
+                    .read_race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
+            {
+                throw_ub_format!("racy imperfectly overlapping atomic access");
+            }
+        }
+        Ok(())
+    }
+
+    // Same as above but needs to be ordered with all previous atomic read or writes
+    fn validate_overlapping_atomic_write(
+        &mut self,
+        place: &MPlaceTy<'tcx, Tag>,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let (
+            crate::AllocExtra {
+                weak_memory: Some(alloc_buffers),
+                data_race: Some(alloc_clocks),
+                ..
+            },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            let range = alloc_range(base_offset, place.layout.size);
+            if alloc_buffers.is_overlapping(range)
+                && !alloc_clocks.write_race_free_with_atomic(range, global)
+            {
+                throw_ub_format!("racy imperfectly overlapping atomic access");
+            }
+        }
+        Ok(())
+    }
+
     fn buffered_atomic_rmw(
         &mut self,
         new_val: ScalarMaybeUninit<Tag>,
diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
index 34097f4a8937..7dad0a12e5dd 100644
--- a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
+++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
@@ -9,8 +9,8 @@
 // so we have to stick to C++11 emulation from existing research.
 
 use std::sync::atomic::Ordering::*;
-use std::thread::spawn;
 use std::sync::atomic::{fence, AtomicUsize};
+use std::thread::spawn;
 
 // Spins until it reads the given value
 fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
@@ -25,7 +25,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
 fn static_atomic(val: usize) -> &'static AtomicUsize {
     let ret = Box::leak(Box::new(AtomicUsize::new(val)));
     // A workaround to put the initialization value in the store buffer.
-    ret.store(val, Relaxed);
+    ret.load(Relaxed);
     ret
 }
 
@@ -82,4 +82,4 @@ pub fn main() {
     for _ in 0..500 {
         test_cpp20_rwc_syncs();
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.rs b/tests/compile-fail/weak_memory/racing_mixed_size.rs
new file mode 100644
index 000000000000..d4ba48afe39d
--- /dev/null
+++ b/tests/compile-fail/weak_memory/racing_mixed_size.rs
@@ -0,0 +1,38 @@
+// compile-flags: -Zmiri-ignore-leaks
+
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::AtomicU32;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+fn static_atomic_u32(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
+// Wine's SRWLock implementation does this, which is definitely undefined in C++ memory model
+// https://github.com/wine-mirror/wine/blob/303f8042f9db508adaca02ef21f8de4992cb9c03/dlls/ntdll/sync.c#L543-L566
+// Though it probably works just fine on x86
+pub fn main() {
+    let x = static_atomic_u32(0);
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
diff --git a/tests/run-pass/weak_memory/extra_cpp.rs b/tests/run-pass/weak_memory/extra_cpp.rs
index b20ec5834990..b1a683798bb3 100644
--- a/tests/run-pass/weak_memory/extra_cpp.rs
+++ b/tests/run-pass/weak_memory/extra_cpp.rs
@@ -4,13 +4,19 @@
 // but doable in safe (at least sound) Rust.
 
 #![feature(atomic_from_mut)]
+#![feature(core_intrinsics)]
 
 use std::sync::atomic::Ordering::*;
-use std::sync::atomic::{AtomicU16, AtomicU32, AtomicUsize};
+use std::sync::atomic::{AtomicU16, AtomicU32};
 use std::thread::spawn;
 
-fn static_atomic_mut(val: usize) -> &'static mut AtomicUsize {
-    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+fn static_atomic_mut(val: u32) -> &'static mut AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn static_atomic(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
     ret
 }
 
@@ -18,6 +24,10 @@ fn split_u32(dword: &mut u32) -> &mut [u16; 2] {
     unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) }
 }
 
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
 fn mem_replace() {
     let mut x = AtomicU32::new(0);
 
@@ -31,7 +41,7 @@ fn assign_to_mut() {
     let x = static_atomic_mut(0);
     x.store(1, Relaxed);
 
-    *x = AtomicUsize::new(2);
+    *x = AtomicU32::new(2);
 
     assert_eq!(x.load(Relaxed), 2);
 }
@@ -70,10 +80,81 @@ fn from_mut_split() {
     assert_eq!(x_lo_atomic.load(Relaxed), u16::from_be(0xfafa));
 }
 
+// Although not possible to do in safe Rust,
+// we allow non-atomic and atomic reads to race
+// as this should be sound
+fn racing_mixed_atomicity_read() {
+    let x = static_atomic(0);
+    x.store(42, Relaxed);
+
+    let j1 = spawn(move || x.load(Relaxed));
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { std::intrinsics::atomic_load_relaxed(x_ptr) }
+    });
+
+    let r1 = j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    assert_eq!(r1, 42);
+    assert_eq!(r2, 42);
+}
+
+fn racing_mixed_size_read() {
+    let x = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
+
+fn racing_mixed_atomicity_and_size_read() {
+    let x = static_atomic(u32::from_be(0xabbafafa));
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { *x_ptr };
+    });
+
+    let j3 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi)
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+
+    assert_eq!(r3, u16::from_be(0xabba));
+}
 
 pub fn main() {
     get_mut_write();
     from_mut_split();
     assign_to_mut();
     mem_replace();
+    racing_mixed_atomicity_read();
+    racing_mixed_size_read();
+    racing_mixed_atomicity_and_size_read();
 }