From a438401d05beb6d4307e4741b38c3a133de00c0d Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Fri, 18 Jul 2025 13:40:11 +0200 Subject: [PATCH] move page protection logic inside native_lib and ensure we don't unwind out of the "weird state" during tracing --- src/tools/miri/Cargo.toml | 5 +- src/tools/miri/src/alloc/isolated_alloc.rs | 54 ++--------- src/tools/miri/src/lib.rs | 1 + src/tools/miri/src/shims/native_lib/mod.rs | 13 +-- .../miri/src/shims/native_lib/trace/child.rs | 95 ++++++++++++------- .../src/shims/native_lib/trace/messages.rs | 3 +- 6 files changed, 81 insertions(+), 90 deletions(-) diff --git a/src/tools/miri/Cargo.toml b/src/tools/miri/Cargo.toml index 0aff34e9fbe6..b3503acf0c0e 100644 --- a/src/tools/miri/Cargo.toml +++ b/src/tools/miri/Cargo.toml @@ -38,13 +38,14 @@ features = ['unprefixed_malloc_on_supported_platforms'] [target.'cfg(unix)'.dependencies] libc = "0.2" +# native-lib dependencies libffi = { version = "4.0.0", optional = true } libloading = { version = "0.8", optional = true } -nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"], optional = true } +serde = { version = "1.0.219", features = ["derive"], optional = true } [target.'cfg(target_os = "linux")'.dependencies] +nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"], optional = true } ipc-channel = { version = "0.19.0", optional = true } -serde = { version = "1.0.219", features = ["derive"], optional = true } capstone = { version = "0.13", optional = true } [dev-dependencies] diff --git a/src/tools/miri/src/alloc/isolated_alloc.rs b/src/tools/miri/src/alloc/isolated_alloc.rs index 7b2f1a3eebf3..1745727b16b4 100644 --- a/src/tools/miri/src/alloc/isolated_alloc.rs +++ b/src/tools/miri/src/alloc/isolated_alloc.rs @@ -1,7 +1,6 @@ use std::alloc::Layout; use std::ptr::NonNull; -use nix::sys::mman; use rustc_index::bit_set::DenseBitSet; /// How many bytes of memory each bit in the bitset represents. @@ -44,6 +43,10 @@ impl IsolatedAlloc { } } + pub fn page_size(&self) -> usize { + self.page_size + } + /// For simplicity, we serve small allocations in multiples of COMPRESSION_FACTOR /// bytes with at least that alignment. #[inline] @@ -302,50 +305,11 @@ impl IsolatedAlloc { } } - /// Returns a list of page addresses managed by the allocator. - pub fn pages(&self) -> impl Iterator { - let pages = self.page_ptrs.iter().map(|p| p.expose_provenance().get()); - pages.chain(self.huge_ptrs.iter().flat_map(|(ptr, size)| { - (0..size / self.page_size) - .map(|i| ptr.expose_provenance().get().strict_add(i * self.page_size)) - })) - } - - /// Protects all owned memory as `PROT_NONE`, preventing accesses. - /// - /// SAFETY: Accessing memory after this point will result in a segfault - /// unless it is first unprotected. - pub unsafe fn start_ffi(&mut self) -> Result<(), nix::errno::Errno> { - let prot = mman::ProtFlags::PROT_NONE; - unsafe { self.mprotect(prot) } - } - - /// Deprotects all owned memory by setting it to RW. Erroring here is very - /// likely unrecoverable, so it may panic if applying those permissions - /// fails. - pub fn end_ffi(&mut self) { - let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE; - unsafe { - self.mprotect(prot).unwrap(); - } - } - - /// Applies `prot` to every page managed by the allocator. - /// - /// SAFETY: Accessing memory in violation of the protection flags will - /// trigger a segfault. - unsafe fn mprotect(&mut self, prot: mman::ProtFlags) -> Result<(), nix::errno::Errno> { - for &pg in &self.page_ptrs { - unsafe { - mman::mprotect(pg.cast(), self.page_size, prot)?; - } - } - for &(hpg, size) in &self.huge_ptrs { - unsafe { - mman::mprotect(hpg.cast(), size.next_multiple_of(self.page_size), prot)?; - } - } - Ok(()) + /// Returns a list of page ranges managed by the allocator, given in terms of pointers + /// and size (in bytes). + pub fn pages(&self) -> impl Iterator, usize)> { + let pages = self.page_ptrs.iter().map(|&p| (p, self.page_size)); + pages.chain(self.huge_ptrs.iter().copied()) } } diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index 0f293c0d8f7c..ae70257653c8 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(abort_unwind)] #![feature(cfg_select)] #![feature(rustc_private)] #![feature(float_gamma)] diff --git a/src/tools/miri/src/shims/native_lib/mod.rs b/src/tools/miri/src/shims/native_lib/mod.rs index fb7b1df41a4c..2827ed997a7c 100644 --- a/src/tools/miri/src/shims/native_lib/mod.rs +++ b/src/tools/miri/src/shims/native_lib/mod.rs @@ -8,6 +8,7 @@ use rustc_abi::{BackendRepr, HasDataLayout, Size}; use rustc_middle::mir::interpret::Pointer; use rustc_middle::ty::{self as ty, IntTy, UintTy}; use rustc_span::Symbol; +use serde::{Deserialize, Serialize}; #[cfg_attr( not(all( @@ -23,18 +24,14 @@ use crate::*; /// The final results of an FFI trace, containing every relevant event detected /// by the tracer. -#[allow(dead_code)] -#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug)] +#[derive(Serialize, Deserialize, Debug)] pub struct MemEvents { /// An list of memory accesses that occurred, in the order they occurred in. pub acc_events: Vec, } /// A single memory access. -#[allow(dead_code)] -#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))] -#[derive(Clone, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub enum AccessEvent { /// A read occurred on this memory range. Read(AccessRange), @@ -56,9 +53,7 @@ impl AccessEvent { } /// The memory touched by a given access. -#[allow(dead_code)] -#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))] -#[derive(Clone, Debug)] +#[derive(Serialize, Deserialize, Clone, Debug)] pub struct AccessRange { /// The base address in memory where an access occurred. pub addr: usize, diff --git a/src/tools/miri/src/shims/native_lib/trace/child.rs b/src/tools/miri/src/shims/native_lib/trace/child.rs index de26cb0fe557..b998ba822dde 100644 --- a/src/tools/miri/src/shims/native_lib/trace/child.rs +++ b/src/tools/miri/src/shims/native_lib/trace/child.rs @@ -1,8 +1,9 @@ use std::cell::RefCell; +use std::ptr::NonNull; use std::rc::Rc; use ipc_channel::ipc; -use nix::sys::{ptrace, signal}; +use nix::sys::{mman, ptrace, signal}; use nix::unistd; use rustc_const_eval::interpret::InterpResult; @@ -44,6 +45,16 @@ impl Supervisor { SUPERVISOR.lock().unwrap().is_some() } + unsafe fn protect_pages( + pages: impl Iterator, usize)>, + prot: mman::ProtFlags, + ) -> Result<(), nix::errno::Errno> { + for (pg, sz) in pages { + unsafe { mman::mprotect(pg.cast(), sz, prot)? }; + } + Ok(()) + } + /// Performs an arbitrary FFI call, enabling tracing from the supervisor. /// As this locks the supervisor via a mutex, no other threads may enter FFI /// until this function returns. @@ -60,47 +71,67 @@ impl Supervisor { // Get pointers to all the pages the supervisor must allow accesses in // and prepare the callback stack. - let page_ptrs = alloc.borrow().pages().collect(); + let alloc = alloc.borrow(); + let page_size = alloc.page_size(); + let page_ptrs = alloc + .pages() + .flat_map(|(pg, sz)| { + // Convert (page, size) pair into list of pages. + let start = pg.expose_provenance().get(); + (0..sz.strict_div(alloc.page_size())) + .map(move |i| start.strict_add(i.strict_mul(page_size))) + }) + .collect(); let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] = Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast(); let stack_ptr = raw_stack_ptr.expose_provenance(); let start_info = StartFfiInfo { page_ptrs, stack_ptr }; - // SAFETY: We do not access machine memory past this point until the - // supervisor is ready to allow it. - unsafe { - if alloc.borrow_mut().start_ffi().is_err() { - // Don't mess up unwinding by maybe leaving the memory partly protected - alloc.borrow_mut().end_ffi(); - panic!("Cannot protect memory for FFI call!"); + // Unwinding might be messed up due to partly protected memory, so let's abort if something + // breaks inside here. + let res = std::panic::abort_unwind(|| { + // SAFETY: We do not access machine memory past this point until the + // supervisor is ready to allow it. + // FIXME: this is sketchy, as technically the memory is still in the Rust Abstract Machine, + // and the compiler would be allowed to reorder accesses below this block... + unsafe { + Self::protect_pages(alloc.pages(), mman::ProtFlags::PROT_NONE).unwrap(); } - } - // Send over the info. - // NB: if we do not wait to receive a blank confirmation response, it is - // possible that the supervisor is alerted of the SIGSTOP *before* it has - // actually received the start_info, thus deadlocking! This way, we can - // enforce an ordering for these events. - sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap(); - sv.confirm_rx.recv().unwrap(); - // We need to be stopped for the supervisor to be able to make certain - // modifications to our memory - simply waiting on the recv() doesn't - // count. - signal::raise(signal::SIGSTOP).unwrap(); + // Send over the info. + // NB: if we do not wait to receive a blank confirmation response, it is + // possible that the supervisor is alerted of the SIGSTOP *before* it has + // actually received the start_info, thus deadlocking! This way, we can + // enforce an ordering for these events. + sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap(); + sv.confirm_rx.recv().unwrap(); + // We need to be stopped for the supervisor to be able to make certain + // modifications to our memory - simply waiting on the recv() doesn't + // count. + signal::raise(signal::SIGSTOP).unwrap(); - let res = f(); + let res = f(); - // We can't use IPC channels here to signal that FFI mode has ended, - // since they might allocate memory which could get us stuck in a SIGTRAP - // with no easy way out! While this could be worked around, it is much - // simpler and more robust to simply use the signals which are left for - // arbitrary usage. Since this will block until we are continued by the - // supervisor, we can assume past this point that everything is back to - // normal. - signal::raise(signal::SIGUSR1).unwrap(); + // We can't use IPC channels here to signal that FFI mode has ended, + // since they might allocate memory which could get us stuck in a SIGTRAP + // with no easy way out! While this could be worked around, it is much + // simpler and more robust to simply use the signals which are left for + // arbitrary usage. Since this will block until we are continued by the + // supervisor, we can assume past this point that everything is back to + // normal. + signal::raise(signal::SIGUSR1).unwrap(); - // This is safe! It just sets memory to normal expected permissions. - alloc.borrow_mut().end_ffi(); + // SAFETY: We set memory back to normal, so this is safe. + unsafe { + Self::protect_pages( + alloc.pages(), + mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE, + ) + .unwrap(); + } + + res + }); // SAFETY: Caller upholds that this pointer was allocated as a box with // this type. diff --git a/src/tools/miri/src/shims/native_lib/trace/messages.rs b/src/tools/miri/src/shims/native_lib/trace/messages.rs index 1f9df556b577..bef6cc1b2f3e 100644 --- a/src/tools/miri/src/shims/native_lib/trace/messages.rs +++ b/src/tools/miri/src/shims/native_lib/trace/messages.rs @@ -45,8 +45,7 @@ pub enum TraceRequest { /// Information needed to begin tracing. #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] pub struct StartFfiInfo { - /// A vector of page addresses. These should have been automatically obtained - /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::start_ffi`. + /// A vector of page addresses that store the miri heap which is accessible from C. pub page_ptrs: Vec, /// The address of an allocation that can serve as a temporary stack. /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int.