Auto merge of #117011 - RalfJung:miri, r=RalfJung

Miri subtree update r? `@ghost`
2023-10-21 07:14:36 +00:00 · 2023-10-21 07:14:36 +00:00 · 45a45c6e60
commit 45a45c6e60
parent 249624b504 49e8acbfe9
35 changed files with 1516 additions and 186 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -24,6 +24,17 @@ dependencies = [
 "rustc-std-workspace-core",
 ]

+[[package]]
+name = "aes"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2"
+dependencies = [
+ "cfg-if",
+ "cipher",
+ "cpufeatures",
+]
+
 [[package]]
 name = "ahash"
 version = "0.8.3"
@ -470,6 +481,16 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "cipher"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
+dependencies = [
+ "crypto-common",
+ "inout",
+]
+
 [[package]]
 name = "clap"
 version = "4.4.4"
@ -1984,6 +2005,15 @@ version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306"

+[[package]]
+name = "inout"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
+dependencies = [
+ "generic-array",
+]
+
 [[package]]
 name = "installer"
 version = "0.0.0"
@ -2465,6 +2495,7 @@ dependencies = [
 name = "miri"
 version = "0.1.0"
 dependencies = [
+ "aes",
 "colored",
 "ctrlc",
 "env_logger 0.10.0",
--- a/src/tools/miri/.github/workflows/ci.yml
+++ b/src/tools/miri/.github/workflows/ci.yml
@ -188,7 +188,7 @@ jobs:
        with:
          fetch-depth: 256 # get a bit more of the history
      - name: install josh-proxy
-        run: cargo +stable install josh-proxy --git https://github.com/josh-project/josh --tag r22.12.06
+        run: RUSTFLAGS="--cap-lints warn" cargo +stable install josh-proxy --git https://github.com/josh-project/josh --tag r22.12.06
      - name: setup bot git name and email
        run: |
          git config --global user.name 'The Miri Conjob Bot'
@ -208,7 +208,7 @@ jobs:
          git push -u origin $BRANCH
      - name: Create Pull Request
        run: |
-          PR=$(gh pr create -B master --title 'Automatic sync from rustc' --body '')
+          PR=$(gh pr create -B master --title 'Automatic Rustup' --body '')
          ~/.local/bin/zulip-send --user $ZULIP_BOT_EMAIL --api-key $ZULIP_API_TOKEN --site https://rust-lang.zulipchat.com \
            --stream miri --subject "Cron Job Failure (miri, $(date -u +%Y-%m))" \
            --message "A PR doing a rustc-pull [has been automatically created]($PR) for your convenience."
--- a/src/tools/miri/Cargo.lock
+++ b/src/tools/miri/Cargo.lock
@ -17,6 +17,17 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"

+[[package]]
+name = "aes"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac1f845298e95f983ff1944b728ae08b8cebab80d684f0a832ed0fc74dfa27e2"
+dependencies = [
+ "cfg-if",
+ "cipher",
+ "cpufeatures",
+]
+
 [[package]]
 name = "aho-corasick"
 version = "1.1.1"
@ -142,6 +153,16 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"

+[[package]]
+name = "cipher"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
+dependencies = [
+ "crypto-common",
+ "inout",
+]
+
 [[package]]
 name = "color-eyre"
 version = "0.6.2"
@ -199,6 +220,15 @@ dependencies = [
 "windows-sys 0.45.0",
 ]

+[[package]]
+name = "cpufeatures"
+version = "0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.8"
@ -218,6 +248,16 @@ dependencies = [
 "cfg-if",
 ]

+[[package]]
+name = "crypto-common"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
 [[package]]
 name = "ctrlc"
 version = "3.4.1"
@ -284,6 +324,16 @@ version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5"

+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.2.10"
@ -332,6 +382,15 @@ dependencies = [
 "unicode-width",
 ]

+[[package]]
+name = "inout"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
+dependencies = [
+ "generic-array",
+]
+
 [[package]]
 name = "instant"
 version = "0.1.12"
@ -469,6 +528,7 @@ dependencies = [
 name = "miri"
 version = "0.1.0"
 dependencies = [
+ "aes",
 "colored",
 "ctrlc",
 "env_logger",
@ -726,9 +786,9 @@ dependencies = [

 [[package]]
 name = "rustix"
-version = "0.38.17"
+version = "0.38.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
+checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed"
 dependencies = [
 "bitflags 2.4.0",
 "errno",
@ -909,6 +969,12 @@ dependencies = [
 "tracing-core",
 ]

+[[package]]
+name = "typenum"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+
 [[package]]
 name = "ui_test"
 version = "0.21.2"
@ -954,6 +1020,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"

+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
 [[package]]
 name = "wasi"
 version = "0.11.0+wasi-snapshot-preview1"
--- a/src/tools/miri/Cargo.toml
+++ b/src/tools/miri/Cargo.toml
@ -23,6 +23,7 @@ env_logger = "0.10"
 log = "0.4"
 rand = "0.8"
 smallvec = "1.7"
+aes = { version = "0.8.3", features = ["hazmat"] }

 measureme = "10.0.0"
 ctrlc = "3.2.5"
--- a/src/tools/miri/cargo-miri/Cargo.lock
+++ b/src/tools/miri/cargo-miri/Cargo.lock
@ -238,9 +238,9 @@ dependencies = [

 [[package]]
 name = "rustix"
-version = "0.38.17"
+version = "0.38.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
+checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed"
 dependencies = [
 "bitflags 2.4.0",
 "errno",
--- a/src/tools/miri/miri-script/Cargo.lock
+++ b/src/tools/miri/miri-script/Cargo.lock
@ -213,9 +213,9 @@ dependencies = [

 [[package]]
 name = "rustix"
-version = "0.38.17"
+version = "0.38.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f25469e9ae0f3d0047ca8b93fc56843f38e6774f0914a107ff8b41be8be8e0b7"
+checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed"
 dependencies = [
 "bitflags 2.4.0",
 "errno",
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@ -1 +1 @@
-4ea5190026dbc1302b644d938e68bc6843cb8b24
+249624b5043013d18c00f0401ca431c1a6baa8cd
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/mod.rs
@ -206,11 +206,7 @@ trait EvalContextPrivExt<'mir: 'ecx, 'tcx: 'mir, 'ecx>: crate::MiriInterpCxExt<'
        // Make sure the new permission makes sense as the initial permission of a fresh tag.
        assert!(new_perm.initial_state.is_initial());
        // Ensure we bail out if the pointer goes out-of-bounds (see miri#1050).
-        this.check_ptr_access(
-            place.ptr(),
-            ptr_size,
-            CheckInAllocMsg::InboundsTest,
-        )?;
+        this.check_ptr_access(place.ptr(), ptr_size, CheckInAllocMsg::InboundsTest)?;

        // It is crucial that this gets called on all code paths, to ensure we track tag creation.
        let log_creation = |this: &MiriInterpCx<'mir, 'tcx>,
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@ -1017,10 +1017,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
        // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
        // be 8-aligned).
        let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-        this.check_ptr_align(
-            place.ptr(),
-            align,
-        )?;
+        this.check_ptr_align(place.ptr(), align)?;
        // Ensure the allocation is mutable. Even failing (read-only) compare_exchange need mutable
        // memory on many targets (i.e., they segfault if taht memory is mapped read-only), and
        // atomic loads can be implemented via compare_exchange on some targets. There could
--- a/src/tools/miri/src/helpers.rs
+++ b/src/tools/miri/src/helpers.rs
@ -868,9 +868,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
        let size2 = Size::from_bytes(2);
        let this = self.eval_context_mut();
        this.check_ptr_align(ptr, Align::from_bytes(2).unwrap())?;
-        let mut alloc = this
-            .get_ptr_alloc_mut(ptr, size2 * string_length)?
-            .unwrap(); // not a ZST, so we will get a result
+        let mut alloc = this.get_ptr_alloc_mut(ptr, size2 * string_length)?.unwrap(); // not a ZST, so we will get a result
        for (offset, wchar) in wide_str.iter().copied().chain(iter::once(0x0000)).enumerate() {
            let offset = u64::try_from(offset).unwrap();
            alloc.write_scalar(alloc_range(size2 * offset, size2), Scalar::from_u16(wchar))?;
--- a/src/tools/miri/src/intptrcast.rs
+++ b/src/tools/miri/src/intptrcast.rs
@ -26,8 +26,10 @@ pub type GlobalState = RefCell<GlobalStateInner>;

 #[derive(Clone, Debug)]
 pub struct GlobalStateInner {
-    /// This is used as a map between the address of each allocation and its `AllocId`.
-    /// It is always sorted
+    /// This is used as a map between the address of each allocation and its `AllocId`. It is always
+    /// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the
+    /// base address, and we need to find the `AllocId` it belongs to.
+    /// This is not the *full* inverse of `base_addr`; dead allocations have been removed.
    int_to_ptr_map: Vec<(u64, AllocId)>,
    /// The base address for each allocation.  We cannot put that into
    /// `AllocExtra` because function pointers also have a base address, and
@ -62,10 +64,21 @@ impl GlobalStateInner {
    }
 }

-impl<'mir, 'tcx> GlobalStateInner {
+/// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
+/// of `align` that is larger or equal to `addr`
+fn align_addr(addr: u64, align: u64) -> u64 {
+    match addr % align {
+        0 => addr,
+        rem => addr.checked_add(align).unwrap() - rem,
+    }
+}
+
+impl<'mir, 'tcx: 'mir> EvalContextExtPriv<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
    // Returns the exposed `AllocId` that corresponds to the specified addr,
    // or `None` if the addr is out of bounds
-    fn alloc_id_from_addr(ecx: &MiriInterpCx<'mir, 'tcx>, addr: u64) -> Option<AllocId> {
+    fn alloc_id_from_addr(&self, addr: u64) -> Option<AllocId> {
+        let ecx = self.eval_context_ref();
        let global_state = ecx.machine.intptrcast.borrow();
        assert!(global_state.provenance_mode != ProvenanceMode::Strict);

@ -82,31 +95,90 @@ impl<'mir, 'tcx> GlobalStateInner {
                let (glb, alloc_id) = global_state.int_to_ptr_map[pos - 1];
                // This never overflows because `addr >= glb`
                let offset = addr - glb;
-                // If the offset exceeds the size of the allocation, don't use this `alloc_id`.
+                // We require this to be strict in-bounds of the allocation. This arm is only
+                // entered for addresses that are not the base address, so even zero-sized
+                // allocations will get recognized at their base address -- but all other
+                // allocations will *not* be recognized at their "end" address.
                let size = ecx.get_alloc_info(alloc_id).0;
-                if offset <= size.bytes() { Some(alloc_id) } else { None }
+                if offset < size.bytes() { Some(alloc_id) } else { None }
            }
        }?;

-        // We only use this provenance if it has been exposed, *and* is still live.
+        // We only use this provenance if it has been exposed.
        if global_state.exposed.contains(&alloc_id) {
-            let (_size, _align, kind) = ecx.get_alloc_info(alloc_id);
-            match kind {
-                AllocKind::LiveData | AllocKind::Function | AllocKind::VTable => {
-                    return Some(alloc_id);
-                }
-                AllocKind::Dead => {}
-            }
+            // This must still be live, since we remove allocations from `int_to_ptr_map` when they get freed.
+            debug_assert!(!matches!(ecx.get_alloc_info(alloc_id).2, AllocKind::Dead));
+            Some(alloc_id)
+        } else {
+            None
        }
-
-        None
    }

-    pub fn expose_ptr(
-        ecx: &mut MiriInterpCx<'mir, 'tcx>,
-        alloc_id: AllocId,
-        tag: BorTag,
-    ) -> InterpResult<'tcx> {
+    fn addr_from_alloc_id(&self, alloc_id: AllocId) -> InterpResult<'tcx, u64> {
+        let ecx = self.eval_context_ref();
+        let mut global_state = ecx.machine.intptrcast.borrow_mut();
+        let global_state = &mut *global_state;
+
+        Ok(match global_state.base_addr.entry(alloc_id) {
+            Entry::Occupied(entry) => *entry.get(),
+            Entry::Vacant(entry) => {
+                let (size, align, kind) = ecx.get_alloc_info(alloc_id);
+                // This is either called immediately after allocation (and then cached), or when
+                // adjusting `tcx` pointers (which never get freed). So assert that we are looking
+                // at a live allocation. This also ensures that we never re-assign an address to an
+                // allocation that previously had an address, but then was freed and the address
+                // information was removed.
+                assert!(!matches!(kind, AllocKind::Dead));
+
+                // This allocation does not have a base address yet, pick one.
+                // Leave some space to the previous allocation, to give it some chance to be less aligned.
+                let slack = {
+                    let mut rng = ecx.machine.rng.borrow_mut();
+                    // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
+                    rng.gen_range(0..16)
+                };
+                // From next_base_addr + slack, round up to adjust for alignment.
+                let base_addr = global_state
+                    .next_base_addr
+                    .checked_add(slack)
+                    .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
+                let base_addr = align_addr(base_addr, align.bytes());
+                entry.insert(base_addr);
+                trace!(
+                    "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
+                    base_addr,
+                    alloc_id,
+                    size.bytes(),
+                    align.bytes(),
+                    slack,
+                );
+
+                // Remember next base address.  If this allocation is zero-sized, leave a gap
+                // of at least 1 to avoid two allocations having the same base address.
+                // (The logic in `alloc_id_from_addr` assumes unique addresses, and different
+                // function/vtable pointers need to be distinguishable!)
+                global_state.next_base_addr = base_addr
+                    .checked_add(max(size.bytes(), 1))
+                    .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
+                // Even if `Size` didn't overflow, we might still have filled up the address space.
+                if global_state.next_base_addr > ecx.target_usize_max() {
+                    throw_exhaust!(AddressSpaceFull);
+                }
+                // Also maintain the opposite mapping in `int_to_ptr_map`.
+                // Given that `next_base_addr` increases in each allocation, pushing the
+                // corresponding tuple keeps `int_to_ptr_map` sorted
+                global_state.int_to_ptr_map.push((base_addr, alloc_id));
+
+                base_addr
+            }
+        })
+    }
+}
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
+    fn expose_ptr(&mut self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
+        let ecx = self.eval_context_mut();
        let global_state = ecx.machine.intptrcast.get_mut();
        // In strict mode, we don't need this, so we can save some cycles by not tracking it.
        if global_state.provenance_mode != ProvenanceMode::Strict {
@ -119,14 +191,13 @@ impl<'mir, 'tcx> GlobalStateInner {
        Ok(())
    }

-    pub fn ptr_from_addr_cast(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        addr: u64,
-    ) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
+    fn ptr_from_addr_cast(&self, addr: u64) -> InterpResult<'tcx, Pointer<Option<Provenance>>> {
        trace!("Casting {:#x} to a pointer", addr);

-        // Potentially emit a warning.
+        let ecx = self.eval_context_ref();
        let global_state = ecx.machine.intptrcast.borrow();
+
+        // Potentially emit a warning.
        match global_state.provenance_mode {
            ProvenanceMode::Default => {
                // The first time this happens at a particular location, print a warning.
@ -157,99 +228,40 @@ impl<'mir, 'tcx> GlobalStateInner {
        Ok(Pointer::new(Some(Provenance::Wildcard), Size::from_bytes(addr)))
    }

-    fn alloc_base_addr(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        alloc_id: AllocId,
-    ) -> InterpResult<'tcx, u64> {
-        let mut global_state = ecx.machine.intptrcast.borrow_mut();
-        let global_state = &mut *global_state;
-
-        Ok(match global_state.base_addr.entry(alloc_id) {
-            Entry::Occupied(entry) => *entry.get(),
-            Entry::Vacant(entry) => {
-                // There is nothing wrong with a raw pointer being cast to an integer only after
-                // it became dangling.  Hence we allow dead allocations.
-                let (size, align, _kind) = ecx.get_alloc_info(alloc_id);
-
-                // This allocation does not have a base address yet, pick one.
-                // Leave some space to the previous allocation, to give it some chance to be less aligned.
-                let slack = {
-                    let mut rng = ecx.machine.rng.borrow_mut();
-                    // This means that `(global_state.next_base_addr + slack) % 16` is uniformly distributed.
-                    rng.gen_range(0..16)
-                };
-                // From next_base_addr + slack, round up to adjust for alignment.
-                let base_addr = global_state
-                    .next_base_addr
-                    .checked_add(slack)
-                    .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
-                let base_addr = Self::align_addr(base_addr, align.bytes());
-                entry.insert(base_addr);
-                trace!(
-                    "Assigning base address {:#x} to allocation {:?} (size: {}, align: {}, slack: {})",
-                    base_addr,
-                    alloc_id,
-                    size.bytes(),
-                    align.bytes(),
-                    slack,
-                );
-
-                // Remember next base address.  If this allocation is zero-sized, leave a gap
-                // of at least 1 to avoid two allocations having the same base address.
-                // (The logic in `alloc_id_from_addr` assumes unique addresses, and different
-                // function/vtable pointers need to be distinguishable!)
-                global_state.next_base_addr = base_addr
-                    .checked_add(max(size.bytes(), 1))
-                    .ok_or_else(|| err_exhaust!(AddressSpaceFull))?;
-                // Even if `Size` didn't overflow, we might still have filled up the address space.
-                if global_state.next_base_addr > ecx.target_usize_max() {
-                    throw_exhaust!(AddressSpaceFull);
-                }
-                // Given that `next_base_addr` increases in each allocation, pushing the
-                // corresponding tuple keeps `int_to_ptr_map` sorted
-                global_state.int_to_ptr_map.push((base_addr, alloc_id));
-
-                base_addr
-            }
-        })
-    }
-
    /// Convert a relative (tcx) pointer to a Miri pointer.
-    pub fn ptr_from_rel_ptr(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
+    fn ptr_from_rel_ptr(
+        &self,
        ptr: Pointer<AllocId>,
        tag: BorTag,
    ) -> InterpResult<'tcx, Pointer<Provenance>> {
+        let ecx = self.eval_context_ref();
+
        let (alloc_id, offset) = ptr.into_parts(); // offset is relative (AllocId provenance)
-        let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id)?;
+        let base_addr = ecx.addr_from_alloc_id(alloc_id)?;

        // Add offset with the right kind of pointer-overflowing arithmetic.
        let dl = ecx.data_layout();
        let absolute_addr = dl.overflowing_offset(base_addr, offset.bytes()).0;
-        Ok(Pointer::new(
-            Provenance::Concrete { alloc_id, tag },
-            Size::from_bytes(absolute_addr),
-        ))
+        Ok(Pointer::new(Provenance::Concrete { alloc_id, tag }, Size::from_bytes(absolute_addr)))
    }

    /// When a pointer is used for a memory access, this computes where in which allocation the
    /// access is going.
-    pub fn ptr_get_alloc(
-        ecx: &MiriInterpCx<'mir, 'tcx>,
-        ptr: Pointer<Provenance>,
-    ) -> Option<(AllocId, Size)> {
+    fn ptr_get_alloc(&self, ptr: Pointer<Provenance>) -> Option<(AllocId, Size)> {
+        let ecx = self.eval_context_ref();
+
        let (tag, addr) = ptr.into_parts(); // addr is absolute (Tag provenance)

        let alloc_id = if let Provenance::Concrete { alloc_id, .. } = tag {
            alloc_id
        } else {
            // A wildcard pointer.
-            GlobalStateInner::alloc_id_from_addr(ecx, addr.bytes())?
+            ecx.alloc_id_from_addr(addr.bytes())?
        };

        // This cannot fail: since we already have a pointer with that provenance, rel_ptr_to_addr
-        // must have been called in the past.
-        let base_addr = GlobalStateInner::alloc_base_addr(ecx, alloc_id).unwrap();
+        // must have been called in the past, so we can just look up the address in the map.
+        let base_addr = ecx.addr_from_alloc_id(alloc_id).unwrap();

        // Wrapping "addr - base_addr"
        #[allow(clippy::cast_possible_wrap)] // we want to wrap here
@ -259,14 +271,24 @@ impl<'mir, 'tcx> GlobalStateInner {
            Size::from_bytes(ecx.overflowing_signed_offset(addr.bytes(), neg_base_addr).0),
        ))
    }
+}

-    /// Shifts `addr` to make it aligned with `align` by rounding `addr` to the smallest multiple
-    /// of `align` that is larger or equal to `addr`
-    fn align_addr(addr: u64, align: u64) -> u64 {
-        match addr % align {
-            0 => addr,
-            rem => addr.checked_add(align).unwrap() - rem,
-        }
+impl GlobalStateInner {
+    pub fn free_alloc_id(&mut self, dead_id: AllocId) {
+        // We can *not* remove this from `base_addr`, since the interpreter design requires that we
+        // be able to retrieve an AllocId + offset for any memory access *before* we check if the
+        // access is valid. Specifically, `ptr_get_alloc` is called on each attempt at a memory
+        // access to determine the allocation ID and offset -- and there can still be pointers with
+        // `dead_id` that one can attempt to use for a memory access. `ptr_get_alloc` may return
+        // `None` only if the pointer truly has no provenance (this ensures consistent error
+        // messages).
+        // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
+        // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
+        // returns a dead allocation.
+        self.int_to_ptr_map.retain(|&(_, id)| id != dead_id);
+        // We can also remove it from `exposed`, since this allocation can anyway not be returned by
+        // `alloc_id_from_addr` any more.
+        self.exposed.remove(&dead_id);
    }
 }

@ -276,7 +298,7 @@ mod tests {

    #[test]
    fn test_align_addr() {
-        assert_eq!(GlobalStateInner::align_addr(37, 4), 40);
-        assert_eq!(GlobalStateInner::align_addr(44, 4), 44);
+        assert_eq!(align_addr(37, 4), 40);
+        assert_eq!(align_addr(44, 4), 44);
    }
 }
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@ -117,7 +117,7 @@ pub use crate::eval::{
    create_ecx, eval_entry, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, RejectOpWith,
 };
 pub use crate::helpers::EvalContextExt as _;
-pub use crate::intptrcast::ProvenanceMode;
+pub use crate::intptrcast::{EvalContextExt as _, ProvenanceMode};
 pub use crate::machine::{
    AllocExtra, FrameExtra, MiriInterpCx, MiriInterpCxExt, MiriMachine, MiriMemoryKind,
    PrimitiveLayouts, Provenance, ProvenanceExtra,
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@ -1006,7 +1006,10 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
    }

    #[inline(always)]
-    fn generate_nan<F1: rustc_apfloat::Float + rustc_apfloat::FloatConvert<F2>, F2: rustc_apfloat::Float>(
+    fn generate_nan<
+        F1: rustc_apfloat::Float + rustc_apfloat::FloatConvert<F2>,
+        F2: rustc_apfloat::Float,
+    >(
        ecx: &InterpCx<'mir, 'tcx, Self>,
        inputs: &[F1],
    ) -> F2 {
@ -1146,7 +1149,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
            // Value does not matter, SB is disabled
            BorTag::default()
        };
-        intptrcast::GlobalStateInner::ptr_from_rel_ptr(ecx, ptr, tag)
+        ecx.ptr_from_rel_ptr(ptr, tag)
    }

    /// Called on `usize as ptr` casts.
@ -1155,7 +1158,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
        ecx: &MiriInterpCx<'mir, 'tcx>,
        addr: u64,
    ) -> InterpResult<'tcx, Pointer<Option<Self::Provenance>>> {
-        intptrcast::GlobalStateInner::ptr_from_addr_cast(ecx, addr)
+        ecx.ptr_from_addr_cast(addr)
    }

    /// Called on `ptr as usize` casts.
@ -1166,8 +1169,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
        ptr: Pointer<Self::Provenance>,
    ) -> InterpResult<'tcx> {
        match ptr.provenance {
-            Provenance::Concrete { alloc_id, tag } =>
-                intptrcast::GlobalStateInner::expose_ptr(ecx, alloc_id, tag),
+            Provenance::Concrete { alloc_id, tag } => ecx.expose_ptr(alloc_id, tag),
            Provenance::Wildcard => {
                // No need to do anything for wildcard pointers as
                // their provenances have already been previously exposed.
@ -1188,7 +1190,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
        ecx: &MiriInterpCx<'mir, 'tcx>,
        ptr: Pointer<Self::Provenance>,
    ) -> Option<(AllocId, Size, Self::ProvenanceExtra)> {
-        let rel = intptrcast::GlobalStateInner::ptr_get_alloc(ecx, ptr);
+        let rel = ecx.ptr_get_alloc(ptr);

        rel.map(|(alloc_id, size)| {
            let tag = match ptr.provenance {
@ -1260,6 +1262,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for MiriMachine<'mir, 'tcx> {
        {
            *deallocated_at = Some(machine.current_span());
        }
+        machine.intptrcast.get_mut().free_alloc_id(alloc_id);
        Ok(())
    }

--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@ -805,12 +805,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                this.ptr_get_alloc_id(ptr_dest)?;
                this.ptr_get_alloc_id(ptr_src)?;

-                this.mem_copy(
-                    ptr_src,
-                    ptr_dest,
-                    Size::from_bytes(n),
-                    true,
-                )?;
+                this.mem_copy(ptr_src, ptr_dest, Size::from_bytes(n), true)?;
                this.write_pointer(ptr_dest, dest)?;
            }
            "strcpy" => {
@ -826,12 +821,7 @@ trait EvalContextExtPriv<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                // reason to have `strcpy` destroy pointer provenance.
                // This reads at least 1 byte, so we are already enforcing that this is a valid pointer.
                let n = this.read_c_str(ptr_src)?.len().checked_add(1).unwrap();
-                this.mem_copy(
-                    ptr_src,
-                    ptr_dest,
-                    Size::from_bytes(n),
-                    true,
-                )?;
+                this.mem_copy(ptr_src, ptr_dest, Size::from_bytes(n), true)?;
                this.write_pointer(ptr_dest, dest)?;
            }

--- a/src/tools/miri/src/shims/unix/fs.rs
+++ b/src/tools/miri/src/shims/unix/fs.rs
@ -756,11 +756,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
        trace!("Reading from FD {}, size {}", fd, count);

        // Check that the *entire* buffer is actually valid memory.
-        this.check_ptr_access(
-            buf,
-            Size::from_bytes(count),
-            CheckInAllocMsg::MemoryAccessTest,
-        )?;
+        this.check_ptr_access(buf, Size::from_bytes(count), CheckInAllocMsg::MemoryAccessTest)?;

        // We cap the number of read bytes to the largest value that we are able to fit in both the
        // host's and target's `isize`. This saves us from having to handle overflows later.
@ -809,11 +805,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
        // Isolation check is done via `FileDescriptor` trait.

        // Check that the *entire* buffer is actually valid memory.
-        this.check_ptr_access(
-            buf,
-            Size::from_bytes(count),
-            CheckInAllocMsg::MemoryAccessTest,
-        )?;
+        this.check_ptr_access(buf, Size::from_bytes(count), CheckInAllocMsg::MemoryAccessTest)?;

        // We cap the number of written bytes to the largest value that we are able to fit in both the
        // host's and target's `isize`. This saves us from having to handle overflows later.
--- a/src/tools/miri/src/shims/unix/linux/sync.rs
+++ b/src/tools/miri/src/shims/unix/linux/sync.rs
@ -85,10 +85,7 @@ pub fn futex<'tcx>(
                return Ok(());
            }

-            let timeout = this.deref_pointer_as(
-                &args[3],
-                this.libc_ty_layout("timespec"),
-            )?;
+            let timeout = this.deref_pointer_as(&args[3], this.libc_ty_layout("timespec"))?;
            let timeout_time = if this.ptr_is_null(timeout.ptr())? {
                None
            } else {
--- a/src/tools/miri/src/shims/windows/sync.rs
+++ b/src/tools/miri/src/shims/windows/sync.rs
@ -321,8 +321,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
        this.atomic_fence(AtomicFenceOrd::SeqCst)?;

        let layout = this.machine.layouts.uint(size).unwrap();
-        let futex_val = this
-            .read_scalar_atomic(&this.ptr_to_mplace(ptr, layout), AtomicReadOrd::Relaxed)?;
+        let futex_val =
+            this.read_scalar_atomic(&this.ptr_to_mplace(ptr, layout), AtomicReadOrd::Relaxed)?;
        let compare_val = this.read_scalar(&this.ptr_to_mplace(compare, layout))?;

        if futex_val == compare_val {
--- a/src/tools/miri/src/shims/x86/aesni.rs
+++ b/src/tools/miri/src/shims/x86/aesni.rs
@ -0,0 +1,168 @@
+use rustc_middle::ty::layout::LayoutOf as _;
+use rustc_middle::ty::Ty;
+use rustc_span::Symbol;
+use rustc_target::spec::abi::Abi;
+
+use crate::*;
+use shims::foreign_items::EmulateForeignItemResult;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_aesni_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateForeignItemResult> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.aesni.").unwrap();
+
+        match unprefixed_name {
+            // Used to implement the _mm_aesdec_si128, _mm256_aesdec_epi128
+            // and _mm512_aesdec_epi128 functions.
+            // Performs one round of an AES decryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdec_si128
+            "aesdec" | "aesdec.256" | "aesdec.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let key = aes::Block::from(key.to_le_bytes());
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::equiv_inv_cipher_round` documentation states that
+                    // it performs the same operation as the x86 aesdec instruction.
+                    aes::hazmat::equiv_inv_cipher_round(&mut state, &key);
+                    u128::from_le_bytes(state.into())
+                })?;
+            }
+            // Used to implement the _mm_aesdeclast_si128, _mm256_aesdeclast_epi128
+            // and _mm512_aesdeclast_epi128 functions.
+            // Performs last round of an AES decryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesdeclast_si128
+            "aesdeclast" | "aesdeclast.256" | "aesdeclast.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::equiv_inv_cipher_round` does the following operations:
+                    // state = InvShiftRows(state)
+                    // state = InvSubBytes(state)
+                    // state = InvMixColumns(state)
+                    // state = state ^ key
+                    // But we need to skip the InvMixColumns.
+                    // First, use a zeroed key to skip the XOR.
+                    aes::hazmat::equiv_inv_cipher_round(&mut state, &aes::Block::from([0; 16]));
+                    // Then, undo the InvMixColumns with MixColumns.
+                    aes::hazmat::mix_columns(&mut state);
+                    // Finally, do the XOR.
+                    u128::from_le_bytes(state.into()) ^ key
+                })?;
+            }
+            // Used to implement the _mm_aesenc_si128, _mm256_aesenc_epi128
+            // and _mm512_aesenc_epi128 functions.
+            // Performs one round of an AES encryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenc_si128
+            "aesenc" | "aesenc.256" | "aesenc.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let key = aes::Block::from(key.to_le_bytes());
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::cipher_round` documentation states that
+                    // it performs the same operation as the x86 aesenc instruction.
+                    aes::hazmat::cipher_round(&mut state, &key);
+                    u128::from_le_bytes(state.into())
+                })?;
+            }
+            // Used to implement the _mm_aesenclast_si128, _mm256_aesenclast_epi128
+            // and _mm512_aesenclast_epi128 functions.
+            // Performs last round of an AES encryption on each 128-bit word of
+            // `state` with the corresponding 128-bit key of `key`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_aesenclast_si128
+            "aesenclast" | "aesenclast.256" | "aesenclast.512" => {
+                let [state, key] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                aes_round(this, state, key, dest, |state, key| {
+                    let mut state = aes::Block::from(state.to_le_bytes());
+                    // `aes::hazmat::cipher_round` does the following operations:
+                    // state = ShiftRows(state)
+                    // state = SubBytes(state)
+                    // state = MixColumns(state)
+                    // state = state ^ key
+                    // But we need to skip the MixColumns.
+                    // First, use a zeroed key to skip the XOR.
+                    aes::hazmat::cipher_round(&mut state, &aes::Block::from([0; 16]));
+                    // Then, undo the MixColumns with InvMixColumns.
+                    aes::hazmat::inv_mix_columns(&mut state);
+                    // Finally, do the XOR.
+                    u128::from_le_bytes(state.into()) ^ key
+                })?;
+            }
+            // Used to implement the _mm_aesimc_si128 function.
+            // Performs the AES InvMixColumns operation on `op`
+            "aesimc" => {
+                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                // Transmute to `u128`
+                let op = op.transmute(this.machine.layouts.u128, this)?;
+                let dest = dest.transmute(this.machine.layouts.u128, this)?;
+
+                let state = this.read_scalar(&op)?.to_u128()?;
+                let mut state = aes::Block::from(state.to_le_bytes());
+                aes::hazmat::inv_mix_columns(&mut state);
+
+                this.write_scalar(Scalar::from_u128(u128::from_le_bytes(state.into())), &dest)?;
+            }
+            // TODO: Implement the `llvm.x86.aesni.aeskeygenassist` when possible
+            // with an external crate.
+            _ => return Ok(EmulateForeignItemResult::NotSupported),
+        }
+        Ok(EmulateForeignItemResult::NeedsJumping)
+    }
+}
+
+// Performs an AES round (given by `f`) on each 128-bit word of
+// `state` with the corresponding 128-bit key of `key`.
+fn aes_round<'tcx>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    state: &OpTy<'tcx, Provenance>,
+    key: &OpTy<'tcx, Provenance>,
+    dest: &PlaceTy<'tcx, Provenance>,
+    f: impl Fn(u128, u128) -> u128,
+) -> InterpResult<'tcx, ()> {
+    assert_eq!(dest.layout.size, state.layout.size);
+    assert_eq!(dest.layout.size, key.layout.size);
+
+    // Transmute arguments to arrays of `u128`.
+    assert_eq!(dest.layout.size.bytes() % 16, 0);
+    let len = dest.layout.size.bytes() / 16;
+
+    let u128_array_layout =
+        this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u128, len))?;
+
+    let state = state.transmute(u128_array_layout, this)?;
+    let key = key.transmute(u128_array_layout, this)?;
+    let dest = dest.transmute(u128_array_layout, this)?;
+
+    for i in 0..len {
+        let state = this.read_scalar(&this.project_index(&state, i)?)?.to_u128()?;
+        let key = this.read_scalar(&this.project_index(&key, i)?)?.to_u128()?;
+        let dest = this.project_index(&dest, i)?;
+
+        let res = f(state, key);
+
+        this.write_scalar(Scalar::from_u128(res), &dest)?;
+    }
+
+    Ok(())
+}
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@ -7,9 +7,11 @@ use crate::*;
 use helpers::bool_to_simd_element;
 use shims::foreign_items::EmulateForeignItemResult;

+mod aesni;
 mod sse;
 mod sse2;
 mod sse3;
+mod sse41;
 mod ssse3;

 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
@ -100,6 +102,17 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                    this, link_name, abi, args, dest,
                );
            }
+            name if name.starts_with("sse41.") => {
+                return sse41::EvalContextExt::emulate_x86_sse41_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
+            name if name.starts_with("aesni.") => {
+                return aesni::EvalContextExt::emulate_x86_aesni_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
+
            _ => return Ok(EmulateForeignItemResult::NotSupported),
        }
        Ok(EmulateForeignItemResult::NeedsJumping)
--- a/src/tools/miri/src/shims/x86/sse3.rs
+++ b/src/tools/miri/src/shims/x86/sse3.rs
@ -73,12 +73,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                let src_ptr = this.read_pointer(src_ptr)?;
                let dest = dest.force_mplace(this)?;

-                this.mem_copy(
-                    src_ptr,
-                    dest.ptr(),
-                    dest.layout.size,
-                    /*nonoverlapping*/ true,
-                )?;
+                this.mem_copy(src_ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
            }
            _ => return Ok(EmulateForeignItemResult::NotSupported),
        }
--- a/src/tools/miri/src/shims/x86/sse41.rs
+++ b/src/tools/miri/src/shims/x86/sse41.rs
@ -0,0 +1,319 @@
+use rustc_middle::mir;
+use rustc_span::Symbol;
+use rustc_target::abi::Size;
+use rustc_target::spec::abi::Abi;
+
+use crate::*;
+use shims::foreign_items::EmulateForeignItemResult;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_sse41_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateForeignItemResult> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse41.").unwrap();
+
+        match unprefixed_name {
+            // Used to implement the _mm_insert_ps function.
+            // Takes one element of `right` and inserts it into `left` and
+            // optionally zero some elements. Source index is specified
+            // in bits `6..=7` of `imm`, destination index is specified in
+            // bits `4..=5` if `imm`, and `i`th bit specifies whether element
+            // `i` is zeroed.
+            "insertps" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(dest_len, left_len);
+                assert_eq!(dest_len, right_len);
+                assert!(dest_len <= 4);
+
+                let imm = this.read_scalar(imm)?.to_u8()?;
+                let src_index = u64::from((imm >> 6) & 0b11);
+                let dst_index = u64::from((imm >> 4) & 0b11);
+
+                let src_value = this.read_immediate(&this.project_index(&right, src_index)?)?;
+
+                for i in 0..dest_len {
+                    let dest = this.project_index(&dest, i)?;
+
+                    if imm & (1 << i) != 0 {
+                        // zeroed
+                        this.write_scalar(Scalar::from_u32(0), &dest)?;
+                    } else if i == dst_index {
+                        // copy from `right` at specified index
+                        this.write_immediate(*src_value, &dest)?;
+                    } else {
+                        // copy from `left`
+                        this.copy_op(
+                            &this.project_index(&left, i)?,
+                            &dest,
+                            /*allow_transmute*/ false,
+                        )?;
+                    }
+                }
+            }
+            // Used to implement the _mm_packus_epi32 function.
+            // Concatenates two 32-bit signed integer vectors and converts
+            // the result to a 16-bit unsigned integer vector with saturation.
+            "packusdw" => {
+                let [left, right] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(dest_len, left_len.checked_mul(2).unwrap());
+
+                for i in 0..left_len {
+                    let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i32()?;
+                    let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i32()?;
+                    let left_dest = this.project_index(&dest, i)?;
+                    let right_dest = this.project_index(&dest, i.checked_add(left_len).unwrap())?;
+
+                    let left_res =
+                        u16::try_from(left).unwrap_or(if left < 0 { 0 } else { u16::MAX });
+                    let right_res =
+                        u16::try_from(right).unwrap_or(if right < 0 { 0 } else { u16::MAX });
+
+                    this.write_scalar(Scalar::from_u16(left_res), &left_dest)?;
+                    this.write_scalar(Scalar::from_u16(right_res), &right_dest)?;
+                }
+            }
+            // Used to implement the _mm_dp_ps and _mm_dp_pd functions.
+            // Conditionally multiplies the packed floating-point elements in
+            // `left` and `right` using the high 4 bits in `imm`, sums the four
+            // products, and conditionally stores the sum in `dest` using the low
+            // 4 bits of `imm`.
+            "dpps" | "dppd" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert!(dest_len <= 4);
+
+                let imm = this.read_scalar(imm)?.to_u8()?;
+
+                let element_layout = left.layout.field(this, 0);
+
+                // Calculate dot product
+                // Elements are floating point numbers, but we can use `from_int`
+                // because the representation of 0.0 is all zero bits.
+                let mut sum = ImmTy::from_int(0u8, element_layout);
+                for i in 0..left_len {
+                    if imm & (1 << i.checked_add(4).unwrap()) != 0 {
+                        let left = this.read_immediate(&this.project_index(&left, i)?)?;
+                        let right = this.read_immediate(&this.project_index(&right, i)?)?;
+
+                        let mul = this.wrapping_binary_op(mir::BinOp::Mul, &left, &right)?;
+                        sum = this.wrapping_binary_op(mir::BinOp::Add, &sum, &mul)?;
+                    }
+                }
+
+                // Write to destination (conditioned to imm)
+                for i in 0..dest_len {
+                    let dest = this.project_index(&dest, i)?;
+
+                    if imm & (1 << i) != 0 {
+                        this.write_immediate(*sum, &dest)?;
+                    } else {
+                        this.write_scalar(Scalar::from_int(0u8, element_layout.size), &dest)?;
+                    }
+                }
+            }
+            // Used to implement the _mm_floor_ss, _mm_ceil_ss and _mm_round_ss
+            // functions. Rounds the first element of `right` according to `rounding`
+            // and copies the remaining elements from `left`.
+            "round.ss" => {
+                let [left, right, rounding] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                round_first::<rustc_apfloat::ieee::Single>(this, left, right, rounding, dest)?;
+            }
+            // Used to implement the _mm_floor_sd, _mm_ceil_sd and _mm_round_sd
+            // functions. Rounds the first element of `right` according to `rounding`
+            // and copies the remaining elements from `left`.
+            "round.sd" => {
+                let [left, right, rounding] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                round_first::<rustc_apfloat::ieee::Double>(this, left, right, rounding, dest)?;
+            }
+            // Used to implement the _mm_minpos_epu16 function.
+            // Find the minimum unsinged 16-bit integer in `op` and
+            // returns its value and position.
+            "phminposuw" => {
+                let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                // Find minimum
+                let mut min_value = u16::MAX;
+                let mut min_index = 0;
+                for i in 0..op_len {
+                    let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u16()?;
+                    if op < min_value {
+                        min_value = op;
+                        min_index = i;
+                    }
+                }
+
+                // Write value and index
+                this.write_scalar(Scalar::from_u16(min_value), &this.project_index(&dest, 0)?)?;
+                this.write_scalar(
+                    Scalar::from_u16(min_index.try_into().unwrap()),
+                    &this.project_index(&dest, 1)?,
+                )?;
+                // Fill remaining with zeros
+                for i in 2..dest_len {
+                    this.write_scalar(Scalar::from_u16(0), &this.project_index(&dest, i)?)?;
+                }
+            }
+            // Used to implement the _mm_mpsadbw_epu8 function.
+            // Compute the sum of absolute differences of quadruplets of unsigned
+            // 8-bit integers in `left` and `right`, and store the 16-bit results
+            // in `right`. Quadruplets are selected from `left` and `right` with
+            // offsets specified in `imm`.
+            // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
+            "mpsadbw" => {
+                let [left, right, imm] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (left, left_len) = this.operand_to_simd(left)?;
+                let (right, right_len) = this.operand_to_simd(right)?;
+                let (dest, dest_len) = this.place_to_simd(dest)?;
+
+                assert_eq!(left_len, right_len);
+                assert_eq!(left_len, dest_len.checked_mul(2).unwrap());
+
+                let imm = this.read_scalar(imm)?.to_u8()?;
+                // Bit 2 of `imm` specifies the offset for indices of `left`.
+                // The offset is 0 when the bit is 0 or 4 when the bit is 1.
+                let left_offset = u64::from((imm >> 2) & 1).checked_mul(4).unwrap();
+                // Bits 0..=1 of `imm` specify the offset for indices of
+                // `right` in blocks of 4 elements.
+                let right_offset = u64::from(imm & 0b11).checked_mul(4).unwrap();
+
+                for i in 0..dest_len {
+                    let left_offset = left_offset.checked_add(i).unwrap();
+                    let mut res: u16 = 0;
+                    for j in 0..4 {
+                        let left = this
+                            .read_scalar(
+                                &this.project_index(&left, left_offset.checked_add(j).unwrap())?,
+                            )?
+                            .to_u8()?;
+                        let right = this
+                            .read_scalar(
+                                &this
+                                    .project_index(&right, right_offset.checked_add(j).unwrap())?,
+                            )?
+                            .to_u8()?;
+                        res = res.checked_add(left.abs_diff(right).into()).unwrap();
+                    }
+                    this.write_scalar(Scalar::from_u16(res), &this.project_index(&dest, i)?)?;
+                }
+            }
+            // Used to implement the _mm_testz_si128, _mm_testc_si128
+            // and _mm_testnzc_si128 functions.
+            // Tests `op & mask == 0`, `op & mask == mask` or
+            // `op & mask != 0 && op & mask != mask`
+            "ptestz" | "ptestc" | "ptestnzc" => {
+                let [op, mask] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+
+                let (op, op_len) = this.operand_to_simd(op)?;
+                let (mask, mask_len) = this.operand_to_simd(mask)?;
+
+                assert_eq!(op_len, mask_len);
+
+                let f = match unprefixed_name {
+                    "ptestz" => |op, mask| op & mask == 0,
+                    "ptestc" => |op, mask| op & mask == mask,
+                    "ptestnzc" => |op, mask| op & mask != 0 && op & mask != mask,
+                    _ => unreachable!(),
+                };
+
+                let mut all_zero = true;
+                for i in 0..op_len {
+                    let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u64()?;
+                    let mask = this.read_scalar(&this.project_index(&mask, i)?)?.to_u64()?;
+                    all_zero &= f(op, mask);
+                }
+
+                this.write_scalar(Scalar::from_i32(all_zero.into()), dest)?;
+            }
+            _ => return Ok(EmulateForeignItemResult::NotSupported),
+        }
+        Ok(EmulateForeignItemResult::NeedsJumping)
+    }
+}
+
+// Rounds the first element of `right` according to `rounding`
+// and copies the remaining elements from `left`.
+fn round_first<'tcx, F: rustc_apfloat::Float>(
+    this: &mut crate::MiriInterpCx<'_, 'tcx>,
+    left: &OpTy<'tcx, Provenance>,
+    right: &OpTy<'tcx, Provenance>,
+    rounding: &OpTy<'tcx, Provenance>,
+    dest: &PlaceTy<'tcx, Provenance>,
+) -> InterpResult<'tcx, ()> {
+    let (left, left_len) = this.operand_to_simd(left)?;
+    let (right, right_len) = this.operand_to_simd(right)?;
+    let (dest, dest_len) = this.place_to_simd(dest)?;
+
+    assert_eq!(dest_len, left_len);
+    assert_eq!(dest_len, right_len);
+
+    // The fourth bit of `rounding` only affects the SSE status
+    // register, which cannot be accessed from Miri (or from Rust,
+    // for that matter), so we can ignore it.
+    let rounding = match this.read_scalar(rounding)?.to_i32()? & !0b1000 {
+        // When the third bit is 0, the rounding mode is determined by the
+        // first two bits.
+        0b000 => rustc_apfloat::Round::NearestTiesToEven,
+        0b001 => rustc_apfloat::Round::TowardNegative,
+        0b010 => rustc_apfloat::Round::TowardPositive,
+        0b011 => rustc_apfloat::Round::TowardZero,
+        // When the third bit is 1, the rounding mode is determined by the
+        // SSE status register. Since we do not support modifying it from
+        // Miri (or Rust), we assume it to be at its default mode (round-to-nearest).
+        0b100..=0b111 => rustc_apfloat::Round::NearestTiesToEven,
+        rounding => throw_unsup_format!("unsupported rounding mode 0x{rounding:02x}"),
+    };
+
+    let op0: F = this.read_scalar(&this.project_index(&right, 0)?)?.to_float()?;
+    let res = op0.round_to_integral(rounding).value;
+    this.write_scalar(
+        Scalar::from_uint(res.to_bits(), Size::from_bits(F::BITS)),
+        &this.project_index(&dest, 0)?,
+    )?;
+
+    for i in 1..dest_len {
+        this.copy_op(
+            &this.project_index(&left, i)?,
+            &this.project_index(&dest, i)?,
+            /*allow_transmute*/ false,
+        )?;
+    }
+
+    Ok(())
+}
--- a/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_box.rs
+++ b/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_box.rs
@ -1,7 +1,7 @@
 // Should be caught even without retagging
 //@compile-flags: -Zmiri-disable-stacked-borrows
 #![feature(strict_provenance)]
-use std::ptr::{addr_of_mut, self};
+use std::ptr::{self, addr_of_mut};

 // Deref'ing a dangling raw pointer is fine, but for a dangling box it is not.
 // We do this behind a pointer indirection to potentially fool validity checking.
--- a/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_ref.rs
+++ b/src/tools/miri/tests/fail/dangling_pointers/deref_dangling_ref.rs
@ -1,7 +1,7 @@
 // Should be caught even without retagging
 //@compile-flags: -Zmiri-disable-stacked-borrows
 #![feature(strict_provenance)]
-use std::ptr::{addr_of_mut, self};
+use std::ptr::{self, addr_of_mut};

 // Deref'ing a dangling raw pointer is fine, but for a dangling reference it is not.
 // We do this behind a pointer indirection to potentially fool validity checking.
--- a/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.rs
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.rs
@ -1,4 +1,6 @@
-//@compile-flags: -Zmiri-tree-borrows
+// This does need an aliasing model.
+//@revisions: stack tree
+//@[tree]compile-flags: -Zmiri-tree-borrows
 #![feature(raw_ref_op)]
 #![feature(core_intrinsics)]
 #![feature(custom_mir)]
@ -25,6 +27,7 @@ pub fn main() {
 fn myfun(ptr: *mut i32) -> i32 {
    // This overwrites the return place, which shouldn't be possible through another pointer.
    unsafe { ptr.write(0) };
-    //~^ ERROR: /write access .* forbidden/
+    //~[stack]^ ERROR: tag does not exist in the borrow stack
+    //~[tree]| ERROR: /write access .* forbidden/
    13
 }
--- a/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.stack.stderr
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.stack.stderr
@ -0,0 +1,40 @@
+error: Undefined Behavior: attempting a write access using <TAG> at ALLOC[0x0], but that tag does not exist in the borrow stack for this location
+  --> $DIR/return_pointer_aliasing2.rs:LL:CC
+   |
+LL |     unsafe { ptr.write(0) };
+   |              ^^^^^^^^^^^^
+   |              |
+   |              attempting a write access using <TAG> at ALLOC[0x0], but that tag does not exist in the borrow stack for this location
+   |              this error occurs as part of an access at ALLOC[0x0..0x4]
+   |
+   = help: this indicates a potential bug in the program: it performed an invalid operation, but the Stacked Borrows rules it violated are still experimental
+   = help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/stacked-borrows.md for further information
+help: <TAG> was created by a SharedReadWrite retag at offsets [0x0..0x4]
+  --> $DIR/return_pointer_aliasing2.rs:LL:CC
+   |
+LL | /     mir! {
+LL | |         {
+LL | |             let _x = 0;
+LL | |             let ptr = &raw mut _x;
+...  |
+LL | |         }
+LL | |     }
+   | |_____^
+help: <TAG> was later invalidated at offsets [0x0..0x4] by a Unique in-place function argument/return passing protection
+  --> $DIR/return_pointer_aliasing2.rs:LL:CC
+   |
+LL |     unsafe { ptr.write(0) };
+   |     ^^^^^^^^^^^^^^^^^^^^^^^
+   = note: BACKTRACE (of the first span):
+   = note: inside `myfun` at $DIR/return_pointer_aliasing2.rs:LL:CC
+note: inside `main`
+  --> $DIR/return_pointer_aliasing2.rs:LL:CC
+   |
+LL |             Call(_x = myfun(ptr), after_call)
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   = note: this error originates in the macro `::core::intrinsics::mir::__internal_remove_let` which comes from the expansion of the macro `mir` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error
+
--- a/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.tree.stderr
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_aliasing2.tree.stderr
--- a/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.rs
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.rs
@ -0,0 +1,55 @@
+// Doesn't need an aliasing model.
+//@compile-flags: -Zmiri-disable-stacked-borrows
+#![feature(raw_ref_op)]
+#![feature(core_intrinsics)]
+#![feature(custom_mir)]
+
+use std::intrinsics::mir::*;
+use std::panic;
+
+#[repr(C)]
+struct S(i32, [u8; 128]);
+
+#[custom_mir(dialect = "runtime", phase = "optimized")]
+fn docall(out: &mut S) {
+    mir! {
+        {
+            Call(*out = callee(), after_call)
+        }
+
+        after_call = {
+            Return()
+        }
+    }
+}
+
+fn startpanic() -> () {
+    panic!()
+}
+
+#[custom_mir(dialect = "runtime", phase = "optimized")]
+fn callee() -> S {
+    mir! {
+        type RET = S;
+        let _unit: ();
+        {
+            // We test whether changes done to RET before unwinding
+            // become visible to the outside. In codegen we can see them
+            // but Miri should detect this as UB!
+            RET.0 = 42;
+            Call(_unit = startpanic(), after_call)
+        }
+
+        after_call = {
+            Return()
+        }
+    }
+}
+
+fn main() {
+    let mut x = S(0, [0; 128]);
+    panic::catch_unwind(panic::AssertUnwindSafe(|| docall(&mut x))).unwrap_err();
+    // The return place got de-initialized before the call and assigning to RET
+    // does not propagate if we do not reach the `Return`.
+    dbg!(x.0); //~ERROR: uninitialized
+}
--- a/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.stderr
+++ b/src/tools/miri/tests/fail/function_calls/return_pointer_on_unwind.stderr
@ -0,0 +1,19 @@
+thread 'main' panicked at $DIR/return_pointer_on_unwind.rs:LL:CC:
+explicit panic
+note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
+error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
+  --> $DIR/return_pointer_on_unwind.rs:LL:CC
+   |
+LL |     dbg!(x.0);
+   |     ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE:
+   = note: inside `main` at RUSTLIB/std/src/macros.rs:LL:CC
+   = note: this error originates in the macro `dbg` (in Nightly builds, run with -Z macro-backtrace for more info)
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error
+
--- a/src/tools/miri/tests/pass/calls.rs
+++ b/src/tools/miri/tests/pass/calls.rs
@ -34,10 +34,26 @@ fn const_fn_call() -> i64 {
    x
 }

+fn call_return_into_passed_reference() {
+    pub fn func<T>(v: &mut T, f: fn(&T) -> T) {
+        // MIR building will introduce a temporary, so this becomes
+        // `let temp = f(v); *v = temp;`.
+        // If this got optimized to `*v = f(v)` on the MIR level we'd have UB
+        // since the return place may not be observed while the function runs!
+        *v = f(v);
+    }
+
+    let mut x = 0;
+    func(&mut x, |v| v + 1);
+    assert_eq!(x, 1);
+}
+
 fn main() {
    assert_eq!(call(), 2);
    assert_eq!(factorial_recursive(), 3628800);
    assert_eq!(call_generic(), (42, true));
    assert_eq!(cross_crate_fn_call(), 1);
    assert_eq!(const_fn_call(), 11);
+
+    call_return_into_passed_reference();
 }
--- a/src/tools/miri/tests/pass/float_nan.rs
+++ b/src/tools/miri/tests/pass/float_nan.rs
@ -345,10 +345,7 @@ fn test_casts() {
    );
    // Check that the low bits are gone (not the high bits).
    check_all_outcomes(
-        HashSet::from_iter([
-            F32::nan(Pos, Quiet, 0),
-            F32::nan(Neg, Quiet, 0),
-        ]),
+        HashSet::from_iter([F32::nan(Pos, Quiet, 0), F32::nan(Neg, Quiet, 0)]),
        || F32::from(F64::nan(Pos, Quiet, 1).as_f64() as f32),
    );
    check_all_outcomes(
@ -358,7 +355,7 @@ fn test_casts() {
            F32::nan(Pos, Quiet, 1),
            F32::nan(Neg, Quiet, 1),
        ]),
-        || F32::from(F64::nan(Pos, Quiet, 1 << (51-22)).as_f64() as f32),
+        || F32::from(F64::nan(Pos, Quiet, 1 << (51 - 22)).as_f64() as f32),
    );
    check_all_outcomes(
        HashSet::from_iter([
--- a/src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-aes-vaes.rs
@ -0,0 +1,291 @@
+// Ignore everything except x86 and x86_64
+// Any additional target are added to CI should be ignored here
+// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
+//@ignore-target-aarch64
+//@ignore-target-arm
+//@ignore-target-avr
+//@ignore-target-s390x
+//@ignore-target-thumbv7em
+//@ignore-target-wasm32
+//@compile-flags: -C target-feature=+aes,+vaes,+avx512f
+
+#![feature(avx512_target_feature, stdsimd)]
+
+use core::mem::transmute;
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+
+fn main() {
+    assert!(is_x86_feature_detected!("aes"));
+    assert!(is_x86_feature_detected!("vaes"));
+    assert!(is_x86_feature_detected!("avx512f"));
+
+    unsafe {
+        test_aes();
+        test_vaes();
+    }
+}
+
+// The constants in the tests below are just bit patterns. They should not
+// be interpreted as integers; signedness does not make sense for them, but
+// __m128i happens to be defined in terms of signed integers.
+#[allow(overflowing_literals)]
+#[target_feature(enable = "aes")]
+unsafe fn test_aes() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/aes.rs
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesdec_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee);
+        let r = _mm_aesdec_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesdec_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesdeclast_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493);
+        let r = _mm_aesdeclast_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesdeclast_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesenc_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333);
+        let r = _mm_aesenc_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesenc_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesenclast_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let k = _mm_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee);
+        let e = _mm_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
+        let r = _mm_aesenclast_si128(a, k);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesenclast_si128();
+
+    #[target_feature(enable = "aes")]
+    unsafe fn test_mm_aesimc_si128() {
+        // Constants taken from https://msdn.microsoft.com/en-us/library/cc714195.aspx.
+        let a = _mm_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff);
+        let e = _mm_set_epi64x(0xc66c82284ee40aa0, 0x6633441122770055);
+        let r = _mm_aesimc_si128(a);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_aesimc_si128();
+}
+
+// The constants in the tests below are just bit patterns. They should not
+// be interpreted as integers; signedness does not make sense for them, but
+// __m128i happens to be defined in terms of signed integers.
+#[allow(overflowing_literals)]
+#[target_feature(enable = "vaes,avx512f")]
+unsafe fn test_vaes() {
+    #[target_feature(enable = "avx")]
+    unsafe fn get_a256() -> __m256i {
+        // Constants are random
+        _mm256_set_epi64x(
+            0xb89f43a558d3cd51,
+            0x57b3e81e369bd603,
+            0xf177a1a626933fd6,
+            0x50d8adbed1a2f9d7,
+        )
+    }
+    #[target_feature(enable = "avx")]
+    unsafe fn get_k256() -> __m256i {
+        // Constants are random
+        _mm256_set_epi64x(
+            0x503ff704588b5627,
+            0xe23d882ed9c3c146,
+            0x2785e5b670155b3c,
+            0xa750718e183549ff,
+        )
+    }
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesdec_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesdec_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesdec_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesdeclast_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesdeclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesdeclast_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesenc_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesenc_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesenc_epi128();
+
+    #[target_feature(enable = "vaes")]
+    unsafe fn test_mm256_aesenclast_epi128() {
+        let a = get_a256();
+        let k = get_k256();
+        let r = _mm256_aesenclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 2] = transmute(a);
+        let k: [u128; 2] = transmute(k);
+        let r: [u128; 2] = transmute(r);
+        for i in 0..2 {
+            let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm256_aesenclast_epi128();
+
+    #[target_feature(enable = "avx512f")]
+    unsafe fn get_a512() -> __m512i {
+        // Constants are random
+        _mm512_set_epi64(
+            0xb89f43a558d3cd51,
+            0x57b3e81e369bd603,
+            0xf177a1a626933fd6,
+            0x50d8adbed1a2f9d7,
+            0xfbfee3116629db78,
+            0x6aef4a91f2ad50f4,
+            0x4258bb51ff1d476d,
+            0x31da65761c8016cf,
+        )
+    }
+    #[target_feature(enable = "avx512f")]
+    unsafe fn get_k512() -> __m512i {
+        // Constants are random
+        _mm512_set_epi64(
+            0x503ff704588b5627,
+            0xe23d882ed9c3c146,
+            0x2785e5b670155b3c,
+            0xa750718e183549ff,
+            0xdfb408830a65d3d9,
+            0x0de3d92adac81b0a,
+            0xed2741fe12877cae,
+            0x3251ddb5404e0974,
+        )
+    }
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesdec_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesdec_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesdec_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesdec_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesdeclast_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesdeclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesdeclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesdeclast_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesenc_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesenc_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesenc_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesenc_epi128();
+
+    #[target_feature(enable = "vaes,avx512f")]
+    unsafe fn test_mm512_aesenclast_epi128() {
+        let a = get_a512();
+        let k = get_k512();
+        let r = _mm512_aesenclast_epi128(a, k);
+
+        // Check results.
+        let a: [u128; 4] = transmute(a);
+        let k: [u128; 4] = transmute(k);
+        let r: [u128; 4] = transmute(r);
+        for i in 0..4 {
+            let e: u128 = transmute(_mm_aesenclast_si128(transmute(a[i]), transmute(k[i])));
+            assert_eq!(r[i], e);
+        }
+    }
+    test_mm512_aesenclast_epi128();
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
+    assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+}
--- a/src/tools/miri/tests/pass/intrinsics-x86-sse41.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse41.rs
@ -0,0 +1,315 @@
+// Ignore everything except x86 and x86_64
+// Any additional target are added to CI should be ignored here
+// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
+//@ignore-target-aarch64
+//@ignore-target-arm
+//@ignore-target-avr
+//@ignore-target-s390x
+//@ignore-target-thumbv7em
+//@ignore-target-wasm32
+//@compile-flags: -C target-feature=+sse4.1
+
+#[cfg(target_arch = "x86")]
+use std::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use std::arch::x86_64::*;
+use std::mem::transmute;
+
+fn main() {
+    assert!(is_x86_feature_detected!("sse4.1"));
+
+    unsafe {
+        test_sse41();
+    }
+}
+
+#[target_feature(enable = "sse4.1")]
+unsafe fn test_sse41() {
+    // Mostly copied from library/stdarch/crates/core_arch/src/x86/sse41.rs
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_insert_ps() {
+        let a = _mm_set1_ps(1.0);
+        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let r = _mm_insert_ps::<0b11_00_1100>(a, b);
+        let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
+        assert_eq_m128(r, e);
+
+        // Zeroing takes precedence over copied value
+        let a = _mm_set1_ps(1.0);
+        let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
+        let r = _mm_insert_ps::<0b11_00_0001>(a, b);
+        let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
+        assert_eq_m128(r, e);
+    }
+    test_mm_insert_ps();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_packus_epi32() {
+        let a = _mm_setr_epi32(1, 2, 3, 4);
+        let b = _mm_setr_epi32(-1, -2, -3, -4);
+        let r = _mm_packus_epi32(a, b);
+        let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_packus_epi32();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_dp_pd() {
+        let a = _mm_setr_pd(2.0, 3.0);
+        let b = _mm_setr_pd(1.0, 4.0);
+        let e = _mm_setr_pd(14.0, 0.0);
+        assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
+    }
+    test_mm_dp_pd();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_dp_ps() {
+        let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
+        let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
+        let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
+        assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
+    }
+    test_mm_dp_ps();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_floor_sd() {
+        let a = _mm_setr_pd(2.5, 4.5);
+        let b = _mm_setr_pd(-1.5, -3.5);
+        let r = _mm_floor_sd(a, b);
+        let e = _mm_setr_pd(-2.0, 4.5);
+        assert_eq_m128d(r, e);
+    }
+    test_mm_floor_sd();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_floor_ss() {
+        let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
+        let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
+        let r = _mm_floor_ss(a, b);
+        let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
+        assert_eq_m128(r, e);
+    }
+    test_mm_floor_ss();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_ceil_sd() {
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_ceil_sd(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+    }
+    test_mm_ceil_sd();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_ceil_ss() {
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
+        let r = _mm_ceil_ss(a, b);
+        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+    }
+    test_mm_ceil_ss();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_round_sd() {
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
+        let e = _mm_setr_pd(-3.0, 3.5);
+        assert_eq_m128d(r, e);
+
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+
+        // Assume round-to-nearest by default
+        let a = _mm_setr_pd(1.5, 3.5);
+        let b = _mm_setr_pd(-2.5, -4.5);
+        let r = _mm_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
+        let e = _mm_setr_pd(-2.0, 3.5);
+        assert_eq_m128d(r, e);
+    }
+    test_mm_round_sd();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_round_ss() {
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
+        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
+        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
+        let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
+        let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+
+        // Assume round-to-nearest by default
+        let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
+        let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
+        let r = _mm_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
+        let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
+        assert_eq_m128(r, e);
+    }
+    test_mm_round_ss();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_minpos_epu16() {
+        let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
+        let r = _mm_minpos_epu16(a);
+        let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
+        assert_eq_m128i(r, e);
+
+        let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
+        let r = _mm_minpos_epu16(a);
+        let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
+        assert_eq_m128i(r, e);
+
+        // Case where the minimum value is repeated
+        let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
+        let r = _mm_minpos_epu16(a);
+        let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_minpos_epu16();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_mpsadbw_epu8() {
+        let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+
+        let r = _mm_mpsadbw_epu8::<0b000>(a, a);
+        let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
+        assert_eq_m128i(r, e);
+
+        let r = _mm_mpsadbw_epu8::<0b001>(a, a);
+        let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
+        assert_eq_m128i(r, e);
+
+        let r = _mm_mpsadbw_epu8::<0b100>(a, a);
+        let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
+        assert_eq_m128i(r, e);
+
+        let r = _mm_mpsadbw_epu8::<0b101>(a, a);
+        let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
+        assert_eq_m128i(r, e);
+
+        let r = _mm_mpsadbw_epu8::<0b111>(a, a);
+        let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
+        assert_eq_m128i(r, e);
+    }
+    test_mm_mpsadbw_epu8();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_testz_si128() {
+        let a = _mm_set1_epi8(1);
+        let mask = _mm_set1_epi8(0);
+        let r = _mm_testz_si128(a, mask);
+        assert_eq!(r, 1);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b110);
+        let r = _mm_testz_si128(a, mask);
+        assert_eq!(r, 0);
+
+        let a = _mm_set1_epi8(0b011);
+        let mask = _mm_set1_epi8(0b100);
+        let r = _mm_testz_si128(a, mask);
+        assert_eq!(r, 1);
+    }
+    test_mm_testz_si128();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_testc_si128() {
+        let a = _mm_set1_epi8(-1);
+        let mask = _mm_set1_epi8(0);
+        let r = _mm_testc_si128(a, mask);
+        assert_eq!(r, 1);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b110);
+        let r = _mm_testc_si128(a, mask);
+        assert_eq!(r, 0);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b100);
+        let r = _mm_testc_si128(a, mask);
+        assert_eq!(r, 1);
+    }
+    test_mm_testc_si128();
+
+    #[target_feature(enable = "sse4.1")]
+    unsafe fn test_mm_testnzc_si128() {
+        let a = _mm_set1_epi8(0);
+        let mask = _mm_set1_epi8(1);
+        let r = _mm_testnzc_si128(a, mask);
+        assert_eq!(r, 0);
+
+        let a = _mm_set1_epi8(-1);
+        let mask = _mm_set1_epi8(0);
+        let r = _mm_testnzc_si128(a, mask);
+        assert_eq!(r, 0);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b110);
+        let r = _mm_testnzc_si128(a, mask);
+        assert_eq!(r, 1);
+
+        let a = _mm_set1_epi8(0b101);
+        let mask = _mm_set1_epi8(0b101);
+        let r = _mm_testnzc_si128(a, mask);
+        assert_eq!(r, 0);
+    }
+    test_mm_testnzc_si128();
+}
+
+#[track_caller]
+#[target_feature(enable = "sse")]
+unsafe fn assert_eq_m128(a: __m128, b: __m128) {
+    let r = _mm_cmpeq_ps(a, b);
+    if _mm_movemask_ps(r) != 0b1111 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
+    if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
+        panic!("{:?} != {:?}", a, b);
+    }
+}
+
+#[track_caller]
+#[target_feature(enable = "sse2")]
+pub unsafe fn assert_eq_m128i(a: __m128i, b: __m128i) {
+    assert_eq!(transmute::<_, [u64; 2]>(a), transmute::<_, [u64; 2]>(b))
+}
--- a/src/tools/miri/tests/pass/panic/catch_panic.rs
+++ b/src/tools/miri/tests/pass/panic/catch_panic.rs
@ -5,6 +5,7 @@

 use std::cell::Cell;
 use std::panic::{catch_unwind, AssertUnwindSafe};
+use std::process;

 thread_local! {
    static MY_COUNTER: Cell<usize> = Cell::new(0);
@ -62,26 +63,26 @@ fn main() {
    // Built-in panics; also make sure the message is right.
    test(Some("index out of bounds: the len is 3 but the index is 4"), |_old_val| {
        let _val = [0, 1, 2][4];
-        loop {}
+        process::abort()
    });
    test(Some("attempt to divide by zero"), |_old_val| {
        let _val = 1 / 0;
-        loop {}
+        process::abort()
    });

    test(Some("align_offset: align is not a power-of-two"), |_old_val| {
        let _ = std::ptr::null::<u8>().align_offset(3);
-        loop {}
+        process::abort()
    });

    // Assertion and debug assertion
    test(None, |_old_val| {
        assert!(false);
-        loop {}
+        process::abort()
    });
    test(None, |_old_val| {
        debug_assert!(false);
-        loop {}
+        process::abort()
    });

    eprintln!("Success!"); // Make sure we get this in stderr
--- a/src/tools/miri/tests/pass/ptr_raw.rs
+++ b/src/tools/miri/tests/pass/ptr_raw.rs
@ -1,6 +1,6 @@
 #![feature(strict_provenance)]
-use std::ptr::{self, addr_of};
 use std::mem;
+use std::ptr::{self, addr_of};

 fn basic_raw() {
    let mut x = 12;
--- a/src/tools/miri/triagebot.toml
+++ b/src/tools/miri/triagebot.toml
@ -10,6 +10,5 @@ allow-unauthenticated = [
 # Gives us the commands 'ready', 'author', 'blocked'
 [shortcut]

-# disabled until https://github.com/rust-lang/triagebot/pull/1720 lands
-#[no-merges]
-#exclude_titles = ["Rollup of", "sync from rustc"]
+[no-merges]
+exclude_titles = ["Rustup"]