Auto merge of #144019 - RalfJung:miri-sync, r=RalfJung

Miri subtree update

r? `@ghost`
This commit is contained in:
bors 2025-07-16 21:49:31 +00:00
commit fd2eb391d0
53 changed files with 1149 additions and 590 deletions

View file

@ -342,9 +342,9 @@ environment variable. We first document the most relevant and most commonly used
is enabled (the default), this is also used to emulate system entropy. The default seed is 0. You is enabled (the default), this is also used to emulate system entropy. The default seed is 0. You
can increase test coverage by running Miri multiple times with different seeds. can increase test coverage by running Miri multiple times with different seeds.
* `-Zmiri-strict-provenance` enables [strict * `-Zmiri-strict-provenance` enables [strict
provenance](https://github.com/rust-lang/rust/issues/95228) checking in Miri. This means that provenance](https://doc.rust-lang.org/nightly/std/ptr/index.html#strict-provenance) checking in
casting an integer to a pointer will stop execution because the provenance of the pointer Miri. This means that casting an integer to a pointer will stop execution because the provenance
cannot be determined. of the pointer cannot be determined.
* `-Zmiri-symbolic-alignment-check` makes the alignment check more strict. By default, alignment is * `-Zmiri-symbolic-alignment-check` makes the alignment check more strict. By default, alignment is
checked by casting the pointer to an integer, and making sure that is a multiple of the alignment. checked by casting the pointer to an integer, and making sure that is a multiple of the alignment.
This can lead to cases where a program passes the alignment check by pure chance, because things This can lead to cases where a program passes the alignment check by pure chance, because things

View file

@ -7,6 +7,7 @@ repository = "https://github.com/rust-lang/miri"
version = "0.1.0" version = "0.1.0"
default-run = "miri-script" default-run = "miri-script"
edition = "2024" edition = "2024"
rust-version = "1.85"
[workspace] [workspace]
# We make this a workspace root so that cargo does not go looking in ../Cargo.toml for the workspace root. # We make this a workspace root so that cargo does not go looking in ../Cargo.toml for the workspace root.

View file

@ -702,7 +702,6 @@ impl Command {
let mut early_flags = Vec::<OsString>::new(); let mut early_flags = Vec::<OsString>::new();
// In `dep` mode, the target is already passed via `MIRI_TEST_TARGET` // In `dep` mode, the target is already passed via `MIRI_TEST_TARGET`
#[expect(clippy::collapsible_if)] // we need to wait until this is stable
if !dep { if !dep {
if let Some(target) = &target { if let Some(target) = &target {
early_flags.push("--target".into()); early_flags.push("--target".into());
@ -735,7 +734,6 @@ impl Command {
// Add Miri flags // Add Miri flags
let mut cmd = cmd.args(&miri_flags).args(&early_flags).args(&flags); let mut cmd = cmd.args(&miri_flags).args(&early_flags).args(&flags);
// For `--dep` we also need to set the target in the env var. // For `--dep` we also need to set the target in the env var.
#[expect(clippy::collapsible_if)] // we need to wait until this is stable
if dep { if dep {
if let Some(target) = &target { if let Some(target) = &target {
cmd = cmd.env("MIRI_TEST_TARGET", target); cmd = cmd.env("MIRI_TEST_TARGET", target);

View file

@ -1 +1 @@
733b47ea4b1b86216f14ef56e49440c33933f230 7f2065a4bae1faed5bab928c670964eafbf43b55

View file

@ -302,23 +302,20 @@ impl IsolatedAlloc {
} }
} }
/// Returns a vector of page addresses managed by the allocator. /// Returns a list of page addresses managed by the allocator.
pub fn pages(&self) -> Vec<usize> { pub fn pages(&self) -> impl Iterator<Item = usize> {
let mut pages: Vec<usize> = let pages = self.page_ptrs.iter().map(|p| p.expose_provenance().get());
self.page_ptrs.iter().map(|p| p.expose_provenance().get()).collect(); pages.chain(self.huge_ptrs.iter().flat_map(|(ptr, size)| {
for (ptr, size) in self.huge_ptrs.iter() { (0..size / self.page_size)
for i in 0..size / self.page_size { .map(|i| ptr.expose_provenance().get().strict_add(i * self.page_size))
pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size)); }))
}
}
pages
} }
/// Protects all owned memory as `PROT_NONE`, preventing accesses. /// Protects all owned memory as `PROT_NONE`, preventing accesses.
/// ///
/// SAFETY: Accessing memory after this point will result in a segfault /// SAFETY: Accessing memory after this point will result in a segfault
/// unless it is first unprotected. /// unless it is first unprotected.
pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> { pub unsafe fn start_ffi(&mut self) -> Result<(), nix::errno::Errno> {
let prot = mman::ProtFlags::PROT_NONE; let prot = mman::ProtFlags::PROT_NONE;
unsafe { self.mprotect(prot) } unsafe { self.mprotect(prot) }
} }
@ -326,7 +323,7 @@ impl IsolatedAlloc {
/// Deprotects all owned memory by setting it to RW. Erroring here is very /// Deprotects all owned memory by setting it to RW. Erroring here is very
/// likely unrecoverable, so it may panic if applying those permissions /// likely unrecoverable, so it may panic if applying those permissions
/// fails. /// fails.
pub fn unprep_ffi(&mut self) { pub fn end_ffi(&mut self) {
let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE; let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE;
unsafe { unsafe {
self.mprotect(prot).unwrap(); self.mprotect(prot).unwrap();

View file

@ -233,8 +233,6 @@ impl rustc_driver::Callbacks for MiriCompilerCalls {
} else { } else {
let return_code = miri::eval_entry(tcx, entry_def_id, entry_type, &config, None) let return_code = miri::eval_entry(tcx, entry_def_id, entry_type, &config, None)
.unwrap_or_else(|| { .unwrap_or_else(|| {
//#[cfg(target_os = "linux")]
//miri::native_lib::register_retcode_sv(rustc_driver::EXIT_FAILURE);
tcx.dcx().abort_if_errors(); tcx.dcx().abort_if_errors();
rustc_driver::EXIT_FAILURE rustc_driver::EXIT_FAILURE
}); });
@ -337,6 +335,9 @@ impl rustc_driver::Callbacks for MiriBeRustCompilerCalls {
fn exit(exit_code: i32) -> ! { fn exit(exit_code: i32) -> ! {
// Drop the tracing guard before exiting, so tracing calls are flushed correctly. // Drop the tracing guard before exiting, so tracing calls are flushed correctly.
deinit_loggers(); deinit_loggers();
// Make sure the supervisor knows about the code code.
#[cfg(target_os = "linux")]
miri::native_lib::register_retcode_sv(exit_code);
std::process::exit(exit_code); std::process::exit(exit_code);
} }
@ -355,6 +356,11 @@ fn run_compiler_and_exit(
args: &[String], args: &[String],
callbacks: &mut (dyn rustc_driver::Callbacks + Send), callbacks: &mut (dyn rustc_driver::Callbacks + Send),
) -> ! { ) -> ! {
// Install the ctrlc handler that sets `rustc_const_eval::CTRL_C_RECEIVED`, even if
// MIRI_BE_RUSTC is set. We do this late so that when `native_lib::init_sv` is called,
// there are no other threads.
rustc_driver::install_ctrlc_handler();
// Invoke compiler, catch any unwinding panics and handle return code. // Invoke compiler, catch any unwinding panics and handle return code.
let exit_code = let exit_code =
rustc_driver::catch_with_exit_code(move || rustc_driver::run_compiler(args, callbacks)); rustc_driver::catch_with_exit_code(move || rustc_driver::run_compiler(args, callbacks));
@ -439,10 +445,6 @@ fn main() {
let args = rustc_driver::catch_fatal_errors(|| rustc_driver::args::raw_args(&early_dcx)) let args = rustc_driver::catch_fatal_errors(|| rustc_driver::args::raw_args(&early_dcx))
.unwrap_or_else(|_| std::process::exit(rustc_driver::EXIT_FAILURE)); .unwrap_or_else(|_| std::process::exit(rustc_driver::EXIT_FAILURE));
// Install the ctrlc handler that sets `rustc_const_eval::CTRL_C_RECEIVED`, even if
// MIRI_BE_RUSTC is set.
rustc_driver::install_ctrlc_handler();
// If the environment asks us to actually be rustc, then do that. // If the environment asks us to actually be rustc, then do that.
if let Some(crate_kind) = env::var_os("MIRI_BE_RUSTC") { if let Some(crate_kind) = env::var_os("MIRI_BE_RUSTC") {
// Earliest rustc setup. // Earliest rustc setup.
@ -750,15 +752,15 @@ fn main() {
debug!("rustc arguments: {:?}", rustc_args); debug!("rustc arguments: {:?}", rustc_args);
debug!("crate arguments: {:?}", miri_config.args); debug!("crate arguments: {:?}", miri_config.args);
#[cfg(target_os = "linux")]
if !miri_config.native_lib.is_empty() && miri_config.native_lib_enable_tracing { if !miri_config.native_lib.is_empty() && miri_config.native_lib_enable_tracing {
// FIXME: This should display a diagnostic / warning on error // SAFETY: No other threads are running
// SAFETY: If any other threads exist at this point (namely for the ctrlc #[cfg(target_os = "linux")]
// handler), they will not interact with anything on the main rustc/Miri if unsafe { miri::native_lib::init_sv() }.is_err() {
// thread in an async-signal-unsafe way such as by accessing shared eprintln!(
// semaphores, etc.; the handler only calls `sleep()` and `exit()`, which "warning: The native-lib tracer could not be started. Is this an x86 Linux system, and does Miri have permissions to ptrace?\n\
// are async-signal-safe, as is accessing atomics Falling back to non-tracing native-lib mode."
//let _ = unsafe { miri::native_lib::init_sv() }; );
}
} }
run_compiler_and_exit( run_compiler_and_exit(
&rustc_args, &rustc_args,

View file

@ -260,6 +260,7 @@ impl GlobalStateInner {
kind: MemoryKind, kind: MemoryKind,
machine: &MiriMachine<'_>, machine: &MiriMachine<'_>,
) -> AllocState { ) -> AllocState {
let _span = enter_trace_span!(borrow_tracker::new_allocation, ?id, ?alloc_size, ?kind);
match self.borrow_tracker_method { match self.borrow_tracker_method {
BorrowTrackerMethod::StackedBorrows => BorrowTrackerMethod::StackedBorrows =>
AllocState::StackedBorrows(Box::new(RefCell::new(Stacks::new_allocation( AllocState::StackedBorrows(Box::new(RefCell::new(Stacks::new_allocation(
@ -280,6 +281,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
kind: RetagKind, kind: RetagKind,
val: &ImmTy<'tcx>, val: &ImmTy<'tcx>,
) -> InterpResult<'tcx, ImmTy<'tcx>> { ) -> InterpResult<'tcx, ImmTy<'tcx>> {
let _span = enter_trace_span!(borrow_tracker::retag_ptr_value, ?kind, ?val.layout);
let this = self.eval_context_mut(); let this = self.eval_context_mut();
let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
match method { match method {
@ -293,6 +295,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
kind: RetagKind, kind: RetagKind,
place: &PlaceTy<'tcx>, place: &PlaceTy<'tcx>,
) -> InterpResult<'tcx> { ) -> InterpResult<'tcx> {
let _span = enter_trace_span!(borrow_tracker::retag_place_contents, ?kind, ?place);
let this = self.eval_context_mut(); let this = self.eval_context_mut();
let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
match method { match method {
@ -302,6 +305,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
} }
fn protect_place(&mut self, place: &MPlaceTy<'tcx>) -> InterpResult<'tcx, MPlaceTy<'tcx>> { fn protect_place(&mut self, place: &MPlaceTy<'tcx>) -> InterpResult<'tcx, MPlaceTy<'tcx>> {
let _span = enter_trace_span!(borrow_tracker::protect_place, ?place);
let this = self.eval_context_mut(); let this = self.eval_context_mut();
let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
match method { match method {
@ -311,6 +315,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
} }
fn expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> { fn expose_tag(&self, alloc_id: AllocId, tag: BorTag) -> InterpResult<'tcx> {
let _span =
enter_trace_span!(borrow_tracker::expose_tag, alloc_id = alloc_id.0, tag = tag.0);
let this = self.eval_context_ref(); let this = self.eval_context_ref();
let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method; let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
match method { match method {
@ -354,6 +360,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
&self, &self,
frame: &Frame<'tcx, Provenance, FrameExtra<'tcx>>, frame: &Frame<'tcx, Provenance, FrameExtra<'tcx>>,
) -> InterpResult<'tcx> { ) -> InterpResult<'tcx> {
let _span = enter_trace_span!(borrow_tracker::on_stack_pop);
let this = self.eval_context_ref(); let this = self.eval_context_ref();
let borrow_tracker = this.machine.borrow_tracker.as_ref().unwrap(); let borrow_tracker = this.machine.borrow_tracker.as_ref().unwrap();
// The body of this loop needs `borrow_tracker` immutably // The body of this loop needs `borrow_tracker` immutably
@ -431,6 +438,7 @@ impl AllocState {
range: AllocRange, range: AllocRange,
machine: &MiriMachine<'tcx>, machine: &MiriMachine<'tcx>,
) -> InterpResult<'tcx> { ) -> InterpResult<'tcx> {
let _span = enter_trace_span!(borrow_tracker::before_memory_read, alloc_id = alloc_id.0);
match self { match self {
AllocState::StackedBorrows(sb) => AllocState::StackedBorrows(sb) =>
sb.borrow_mut().before_memory_read(alloc_id, prov_extra, range, machine), sb.borrow_mut().before_memory_read(alloc_id, prov_extra, range, machine),
@ -452,6 +460,7 @@ impl AllocState {
range: AllocRange, range: AllocRange,
machine: &MiriMachine<'tcx>, machine: &MiriMachine<'tcx>,
) -> InterpResult<'tcx> { ) -> InterpResult<'tcx> {
let _span = enter_trace_span!(borrow_tracker::before_memory_write, alloc_id = alloc_id.0);
match self { match self {
AllocState::StackedBorrows(sb) => AllocState::StackedBorrows(sb) =>
sb.get_mut().before_memory_write(alloc_id, prov_extra, range, machine), sb.get_mut().before_memory_write(alloc_id, prov_extra, range, machine),
@ -473,6 +482,8 @@ impl AllocState {
size: Size, size: Size,
machine: &MiriMachine<'tcx>, machine: &MiriMachine<'tcx>,
) -> InterpResult<'tcx> { ) -> InterpResult<'tcx> {
let _span =
enter_trace_span!(borrow_tracker::before_memory_deallocation, alloc_id = alloc_id.0);
match self { match self {
AllocState::StackedBorrows(sb) => AllocState::StackedBorrows(sb) =>
sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine), sb.get_mut().before_memory_deallocation(alloc_id, prov_extra, size, machine),
@ -482,6 +493,7 @@ impl AllocState {
} }
pub fn remove_unreachable_tags(&self, tags: &FxHashSet<BorTag>) { pub fn remove_unreachable_tags(&self, tags: &FxHashSet<BorTag>) {
let _span = enter_trace_span!(borrow_tracker::remove_unreachable_tags);
match self { match self {
AllocState::StackedBorrows(sb) => sb.borrow_mut().remove_unreachable_tags(tags), AllocState::StackedBorrows(sb) => sb.borrow_mut().remove_unreachable_tags(tags),
AllocState::TreeBorrows(tb) => tb.borrow_mut().remove_unreachable_tags(tags), AllocState::TreeBorrows(tb) => tb.borrow_mut().remove_unreachable_tags(tags),
@ -496,6 +508,11 @@ impl AllocState {
tag: BorTag, tag: BorTag,
alloc_id: AllocId, // diagnostics alloc_id: AllocId, // diagnostics
) -> InterpResult<'tcx> { ) -> InterpResult<'tcx> {
let _span = enter_trace_span!(
borrow_tracker::release_protector,
alloc_id = alloc_id.0,
tag = tag.0
);
match self { match self {
AllocState::StackedBorrows(_sb) => interp_ok(()), AllocState::StackedBorrows(_sb) => interp_ok(()),
AllocState::TreeBorrows(tb) => AllocState::TreeBorrows(tb) =>
@ -506,6 +523,7 @@ impl AllocState {
impl VisitProvenance for AllocState { impl VisitProvenance for AllocState {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) { fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let _span = enter_trace_span!(borrow_tracker::visit_provenance);
match self { match self {
AllocState::StackedBorrows(sb) => sb.visit_provenance(visit), AllocState::StackedBorrows(sb) => sb.visit_provenance(visit),
AllocState::TreeBorrows(tb) => tb.visit_provenance(visit), AllocState::TreeBorrows(tb) => tb.visit_provenance(visit),

View file

@ -30,7 +30,7 @@ pub type AllocState = Stacks;
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Stacks { pub struct Stacks {
// Even reading memory can have effects on the stack, so we need a `RefCell` here. // Even reading memory can have effects on the stack, so we need a `RefCell` here.
stacks: RangeMap<Stack>, stacks: DedupRangeMap<Stack>,
/// Stores past operations on this allocation /// Stores past operations on this allocation
history: AllocHistory, history: AllocHistory,
/// The set of tags that have been exposed inside this allocation. /// The set of tags that have been exposed inside this allocation.
@ -468,7 +468,7 @@ impl<'tcx> Stacks {
let stack = Stack::new(item); let stack = Stack::new(item);
Stacks { Stacks {
stacks: RangeMap::new(size, stack), stacks: DedupRangeMap::new(size, stack),
history: AllocHistory::new(id, item, machine), history: AllocHistory::new(id, item, machine),
exposed_tags: FxHashSet::default(), exposed_tags: FxHashSet::default(),
} }

View file

@ -314,7 +314,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
let span = this.machine.current_span(); let span = this.machine.current_span();
// Store initial permissions and their corresponding range. // Store initial permissions and their corresponding range.
let mut perms_map: RangeMap<LocationState> = RangeMap::new( let mut perms_map: DedupRangeMap<LocationState> = DedupRangeMap::new(
ptr_size, ptr_size,
LocationState::new_accessed(Permission::new_disabled(), IdempotentForeignAccess::None), // this will be overwritten LocationState::new_accessed(Permission::new_disabled(), IdempotentForeignAccess::None), // this will be overwritten
); );

View file

@ -247,7 +247,7 @@ pub struct Tree {
/// `unwrap` any `perm.get(key)`. /// `unwrap` any `perm.get(key)`.
/// ///
/// We do uphold the fact that `keys(perms)` is a subset of `keys(nodes)` /// We do uphold the fact that `keys(perms)` is a subset of `keys(nodes)`
pub(super) rperms: RangeMap<UniValMap<LocationState>>, pub(super) rperms: DedupRangeMap<UniValMap<LocationState>>,
/// The index of the root node. /// The index of the root node.
pub(super) root: UniIndex, pub(super) root: UniIndex,
} }
@ -609,7 +609,7 @@ impl Tree {
IdempotentForeignAccess::None, IdempotentForeignAccess::None,
), ),
); );
RangeMap::new(size, perms) DedupRangeMap::new(size, perms)
}; };
Self { root: root_idx, nodes, rperms, tag_mapping } Self { root: root_idx, nodes, rperms, tag_mapping }
} }
@ -631,7 +631,7 @@ impl<'tcx> Tree {
base_offset: Size, base_offset: Size,
parent_tag: BorTag, parent_tag: BorTag,
new_tag: BorTag, new_tag: BorTag,
initial_perms: RangeMap<LocationState>, initial_perms: DedupRangeMap<LocationState>,
default_perm: Permission, default_perm: Permission,
protected: bool, protected: bool,
span: Span, span: Span,

View file

@ -997,7 +997,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct VClockAlloc { pub struct VClockAlloc {
/// Assigning each byte a MemoryCellClocks. /// Assigning each byte a MemoryCellClocks.
alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>, alloc_ranges: RefCell<DedupRangeMap<MemoryCellClocks>>,
} }
impl VisitProvenance for VClockAlloc { impl VisitProvenance for VClockAlloc {
@ -1045,7 +1045,7 @@ impl VClockAlloc {
(VTimestamp::ZERO, global.thread_index(ThreadId::MAIN_THREAD)), (VTimestamp::ZERO, global.thread_index(ThreadId::MAIN_THREAD)),
}; };
VClockAlloc { VClockAlloc {
alloc_ranges: RefCell::new(RangeMap::new( alloc_ranges: RefCell::new(DedupRangeMap::new(
len, len,
MemoryCellClocks::new(alloc_timestamp, alloc_index), MemoryCellClocks::new(alloc_timestamp, alloc_index),
)), )),

View file

@ -2,7 +2,6 @@ pub mod cpu_affinity;
pub mod data_race; pub mod data_race;
mod data_race_handler; mod data_race_handler;
pub mod init_once; pub mod init_once;
mod range_object_map;
pub mod sync; pub mod sync;
pub mod thread; pub mod thread;
mod vector_clock; mod vector_clock;

View file

@ -186,15 +186,15 @@ pub struct Thread<'tcx> {
/// The join status. /// The join status.
join_status: ThreadJoinStatus, join_status: ThreadJoinStatus,
/// Stack of active panic payloads for the current thread. Used for storing /// Stack of active unwind payloads for the current thread. Used for storing
/// the argument of the call to `miri_start_unwind` (the panic payload) when unwinding. /// the argument of the call to `miri_start_unwind` (the payload) when unwinding.
/// This is pointer-sized, and matches the `Payload` type in `src/libpanic_unwind/miri.rs`. /// This is pointer-sized, and matches the `Payload` type in `src/libpanic_unwind/miri.rs`.
/// ///
/// In real unwinding, the payload gets passed as an argument to the landing pad, /// In real unwinding, the payload gets passed as an argument to the landing pad,
/// which then forwards it to 'Resume'. However this argument is implicit in MIR, /// which then forwards it to 'Resume'. However this argument is implicit in MIR,
/// so we have to store it out-of-band. When there are multiple active unwinds, /// so we have to store it out-of-band. When there are multiple active unwinds,
/// the innermost one is always caught first, so we can store them as a stack. /// the innermost one is always caught first, so we can store them as a stack.
pub(crate) panic_payloads: Vec<ImmTy<'tcx>>, pub(crate) unwind_payloads: Vec<ImmTy<'tcx>>,
/// Last OS error location in memory. It is a 32-bit integer. /// Last OS error location in memory. It is a 32-bit integer.
pub(crate) last_error: Option<MPlaceTy<'tcx>>, pub(crate) last_error: Option<MPlaceTy<'tcx>>,
@ -282,7 +282,7 @@ impl<'tcx> Thread<'tcx> {
stack: Vec::new(), stack: Vec::new(),
top_user_relevant_frame: None, top_user_relevant_frame: None,
join_status: ThreadJoinStatus::Joinable, join_status: ThreadJoinStatus::Joinable,
panic_payloads: Vec::new(), unwind_payloads: Vec::new(),
last_error: None, last_error: None,
on_stack_empty, on_stack_empty,
} }
@ -292,7 +292,7 @@ impl<'tcx> Thread<'tcx> {
impl VisitProvenance for Thread<'_> { impl VisitProvenance for Thread<'_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) { fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let Thread { let Thread {
panic_payloads: panic_payload, unwind_payloads: panic_payload,
last_error, last_error,
stack, stack,
top_user_relevant_frame: _, top_user_relevant_frame: _,
@ -677,6 +677,8 @@ trait EvalContextPrivExt<'tcx>: MiriInterpCxExt<'tcx> {
fn run_on_stack_empty(&mut self) -> InterpResult<'tcx, Poll<()>> { fn run_on_stack_empty(&mut self) -> InterpResult<'tcx, Poll<()>> {
let this = self.eval_context_mut(); let this = self.eval_context_mut();
// Inform GenMC that a thread has finished all user code. GenMC needs to know this for scheduling. // Inform GenMC that a thread has finished all user code. GenMC needs to know this for scheduling.
// FIXME(GenMC): Thread-local destructors *are* user code, so this is odd. Also now that we
// support pre-main constructors, it can get called there as well.
if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() { if let Some(genmc_ctx) = this.machine.data_race.as_genmc_ref() {
let thread_id = this.active_thread(); let thread_id = this.active_thread();
genmc_ctx.handle_thread_stack_empty(thread_id); genmc_ctx.handle_thread_stack_empty(thread_id);

View file

@ -90,9 +90,9 @@ use rustc_data_structures::fx::FxHashMap;
use super::AllocDataRaceHandler; use super::AllocDataRaceHandler;
use super::data_race::{GlobalState as DataRaceState, ThreadClockSet}; use super::data_race::{GlobalState as DataRaceState, ThreadClockSet};
use super::range_object_map::{AccessType, RangeObjectMap};
use super::vector_clock::{VClock, VTimestamp, VectorIdx}; use super::vector_clock::{VClock, VTimestamp, VectorIdx};
use crate::concurrency::GlobalDataRaceHandler; use crate::concurrency::GlobalDataRaceHandler;
use crate::data_structures::range_object_map::{AccessType, RangeObjectMap};
use crate::*; use crate::*;
pub type AllocState = StoreBufferAlloc; pub type AllocState = StoreBufferAlloc;

View file

@ -17,18 +17,18 @@ struct Elem<T> {
data: T, data: T,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct RangeMap<T> { pub struct DedupRangeMap<T> {
v: Vec<Elem<T>>, v: Vec<Elem<T>>,
} }
impl<T> RangeMap<T> { impl<T> DedupRangeMap<T> {
/// Creates a new `RangeMap` for the given size, and with the given initial value used for /// Creates a new `RangeMap` for the given size, and with the given initial value used for
/// the entire range. /// the entire range.
#[inline(always)] #[inline(always)]
pub fn new(size: Size, init: T) -> RangeMap<T> { pub fn new(size: Size, init: T) -> DedupRangeMap<T> {
let size = size.bytes(); let size = size.bytes();
let v = if size > 0 { vec![Elem { range: 0..size, data: init }] } else { Vec::new() }; let v = if size > 0 { vec![Elem { range: 0..size, data: init }] } else { Vec::new() };
RangeMap { v } DedupRangeMap { v }
} }
pub fn size(&self) -> Size { pub fn size(&self) -> Size {
@ -246,7 +246,7 @@ mod tests {
use super::*; use super::*;
/// Query the map at every offset in the range and collect the results. /// Query the map at every offset in the range and collect the results.
fn to_vec<T: Copy>(map: &RangeMap<T>, offset: u64, len: u64) -> Vec<T> { fn to_vec<T: Copy>(map: &DedupRangeMap<T>, offset: u64, len: u64) -> Vec<T> {
(offset..offset + len) (offset..offset + len)
.map(|i| { .map(|i| {
map.iter(Size::from_bytes(i), Size::from_bytes(1)).next().map(|(_, &t)| t).unwrap() map.iter(Size::from_bytes(i), Size::from_bytes(1)).next().map(|(_, &t)| t).unwrap()
@ -256,7 +256,7 @@ mod tests {
#[test] #[test]
fn basic_insert() { fn basic_insert() {
let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1); let mut map = DedupRangeMap::<i32>::new(Size::from_bytes(20), -1);
// Insert. // Insert.
for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) { for (_, x) in map.iter_mut(Size::from_bytes(10), Size::from_bytes(1)) {
*x = 42; *x = 42;
@ -278,7 +278,7 @@ mod tests {
#[test] #[test]
fn gaps() { fn gaps() {
let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1); let mut map = DedupRangeMap::<i32>::new(Size::from_bytes(20), -1);
for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) { for (_, x) in map.iter_mut(Size::from_bytes(11), Size::from_bytes(1)) {
*x = 42; *x = 42;
} }
@ -319,26 +319,26 @@ mod tests {
#[test] #[test]
#[should_panic] #[should_panic]
fn out_of_range_iter_mut() { fn out_of_range_iter_mut() {
let mut map = RangeMap::<i32>::new(Size::from_bytes(20), -1); let mut map = DedupRangeMap::<i32>::new(Size::from_bytes(20), -1);
let _ = map.iter_mut(Size::from_bytes(11), Size::from_bytes(11)); let _ = map.iter_mut(Size::from_bytes(11), Size::from_bytes(11));
} }
#[test] #[test]
#[should_panic] #[should_panic]
fn out_of_range_iter() { fn out_of_range_iter() {
let map = RangeMap::<i32>::new(Size::from_bytes(20), -1); let map = DedupRangeMap::<i32>::new(Size::from_bytes(20), -1);
let _ = map.iter(Size::from_bytes(11), Size::from_bytes(11)); let _ = map.iter(Size::from_bytes(11), Size::from_bytes(11));
} }
#[test] #[test]
fn empty_map_iter() { fn empty_map_iter() {
let map = RangeMap::<i32>::new(Size::from_bytes(0), -1); let map = DedupRangeMap::<i32>::new(Size::from_bytes(0), -1);
let _ = map.iter(Size::from_bytes(0), Size::from_bytes(0)); let _ = map.iter(Size::from_bytes(0), Size::from_bytes(0));
} }
#[test] #[test]
fn empty_map_iter_mut() { fn empty_map_iter_mut() {
let mut map = RangeMap::<i32>::new(Size::from_bytes(0), -1); let mut map = DedupRangeMap::<i32>::new(Size::from_bytes(0), -1);
let _ = map.iter_mut(Size::from_bytes(0), Size::from_bytes(0)); let _ = map.iter_mut(Size::from_bytes(0), Size::from_bytes(0));
} }
} }

View file

@ -0,0 +1,3 @@
pub mod dedup_range_map;
pub mod mono_hash_map;
pub mod range_object_map;

View file

@ -11,14 +11,14 @@ use rustc_abi::ExternAbi;
use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_hir::def::Namespace; use rustc_hir::def::Namespace;
use rustc_hir::def_id::DefId; use rustc_hir::def_id::DefId;
use rustc_middle::ty::layout::LayoutCx; use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutCx};
use rustc_middle::ty::{self, Ty, TyCtxt}; use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_session::config::EntryFnType; use rustc_session::config::EntryFnType;
use crate::concurrency::GenmcCtx; use crate::concurrency::GenmcCtx;
use crate::concurrency::thread::TlsAllocAction; use crate::concurrency::thread::TlsAllocAction;
use crate::diagnostics::report_leaks; use crate::diagnostics::report_leaks;
use crate::shims::tls; use crate::shims::{global_ctor, tls};
use crate::*; use crate::*;
#[derive(Copy, Clone, Debug)] #[derive(Copy, Clone, Debug)]
@ -216,9 +216,17 @@ impl Default for MiriConfig {
} }
/// The state of the main thread. Implementation detail of `on_main_stack_empty`. /// The state of the main thread. Implementation detail of `on_main_stack_empty`.
#[derive(Default, Debug)] #[derive(Debug)]
enum MainThreadState<'tcx> { enum MainThreadState<'tcx> {
#[default] GlobalCtors {
ctor_state: global_ctor::GlobalCtorState<'tcx>,
/// The main function to call.
entry_id: DefId,
entry_type: MiriEntryFnType,
/// Arguments passed to `main`.
argc: ImmTy<'tcx>,
argv: ImmTy<'tcx>,
},
Running, Running,
TlsDtors(tls::TlsDtorsState<'tcx>), TlsDtors(tls::TlsDtorsState<'tcx>),
Yield { Yield {
@ -234,6 +242,15 @@ impl<'tcx> MainThreadState<'tcx> {
) -> InterpResult<'tcx, Poll<()>> { ) -> InterpResult<'tcx, Poll<()>> {
use MainThreadState::*; use MainThreadState::*;
match self { match self {
GlobalCtors { ctor_state, entry_id, entry_type, argc, argv } => {
match ctor_state.on_stack_empty(this)? {
Poll::Pending => {} // just keep going
Poll::Ready(()) => {
call_main(this, *entry_id, *entry_type, argc.clone(), argv.clone())?;
*self = Running;
}
}
}
Running => { Running => {
*self = TlsDtors(Default::default()); *self = TlsDtors(Default::default());
} }
@ -309,13 +326,6 @@ pub fn create_ecx<'tcx>(
MiriMachine::new(config, layout_cx, genmc_ctx), MiriMachine::new(config, layout_cx, genmc_ctx),
); );
// Some parts of initialization require a full `InterpCx`.
MiriMachine::late_init(&mut ecx, config, {
let mut state = MainThreadState::default();
// Cannot capture anything GC-relevant here.
Box::new(move |m| state.on_main_stack_empty(m))
})?;
// Make sure we have MIR. We check MIR for some stable monomorphic function in libcore. // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
let sentinel = let sentinel =
helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS); helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
@ -326,15 +336,9 @@ pub fn create_ecx<'tcx>(
); );
} }
// Setup first stack frame. // Compute argc and argv from `config.args`.
let entry_instance = ty::Instance::mono(tcx, entry_id);
// First argument is constructed later, because it's skipped for `miri_start.`
// Second argument (argc): length of `config.args`.
let argc = let argc =
ImmTy::from_int(i64::try_from(config.args.len()).unwrap(), ecx.machine.layouts.isize); ImmTy::from_int(i64::try_from(config.args.len()).unwrap(), ecx.machine.layouts.isize);
// Third argument (`argv`): created from `config.args`.
let argv = { let argv = {
// Put each argument in memory, collect pointers. // Put each argument in memory, collect pointers.
let mut argvs = Vec::<Immediate<Provenance>>::with_capacity(config.args.len()); let mut argvs = Vec::<Immediate<Provenance>>::with_capacity(config.args.len());
@ -359,7 +363,7 @@ pub fn create_ecx<'tcx>(
ecx.write_immediate(arg, &place)?; ecx.write_immediate(arg, &place)?;
} }
ecx.mark_immutable(&argvs_place); ecx.mark_immutable(&argvs_place);
// Store `argc` and `argv` for macOS `_NSGetArg{c,v}`. // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`, and for the GC to see them.
{ {
let argc_place = let argc_place =
ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?; ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
@ -374,7 +378,7 @@ pub fn create_ecx<'tcx>(
ecx.machine.argv = Some(argv_place.ptr()); ecx.machine.argv = Some(argv_place.ptr());
} }
// Store command line as UTF-16 for Windows `GetCommandLineW`. // Store command line as UTF-16 for Windows `GetCommandLineW`.
{ if tcx.sess.target.os == "windows" {
// Construct a command string with all the arguments. // Construct a command string with all the arguments.
let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter()); let cmd_utf16: Vec<u16> = args_to_utf16_command_string(config.args.iter());
@ -395,11 +399,43 @@ pub fn create_ecx<'tcx>(
ImmTy::from_immediate(imm, layout) ImmTy::from_immediate(imm, layout)
}; };
// Some parts of initialization require a full `InterpCx`.
MiriMachine::late_init(&mut ecx, config, {
let mut main_thread_state = MainThreadState::GlobalCtors {
entry_id,
entry_type,
argc,
argv,
ctor_state: global_ctor::GlobalCtorState::default(),
};
// Cannot capture anything GC-relevant here.
// `argc` and `argv` *are* GC_relevant, but they also get stored in `machine.argc` and
// `machine.argv` so we are good.
Box::new(move |m| main_thread_state.on_main_stack_empty(m))
})?;
interp_ok(ecx)
}
// Call the entry function.
fn call_main<'tcx>(
ecx: &mut MiriInterpCx<'tcx>,
entry_id: DefId,
entry_type: MiriEntryFnType,
argc: ImmTy<'tcx>,
argv: ImmTy<'tcx>,
) -> InterpResult<'tcx, ()> {
let tcx = ecx.tcx();
// Setup first stack frame.
let entry_instance = ty::Instance::mono(tcx, entry_id);
// Return place (in static memory so that it does not count as leak). // Return place (in static memory so that it does not count as leak).
let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?; let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?;
ecx.machine.main_fn_ret_place = Some(ret_place.clone()); ecx.machine.main_fn_ret_place = Some(ret_place.clone());
// Call start function.
// Call start function.
match entry_type { match entry_type {
MiriEntryFnType::Rustc(EntryFnType::Main { .. }) => { MiriEntryFnType::Rustc(EntryFnType::Main { .. }) => {
let start_id = tcx.lang_items().start_fn().unwrap_or_else(|| { let start_id = tcx.lang_items().start_fn().unwrap_or_else(|| {
@ -409,7 +445,7 @@ pub fn create_ecx<'tcx>(
let main_ret_ty = main_ret_ty.no_bound_vars().unwrap(); let main_ret_ty = main_ret_ty.no_bound_vars().unwrap();
let start_instance = ty::Instance::try_resolve( let start_instance = ty::Instance::try_resolve(
tcx, tcx,
typing_env, ecx.typing_env(),
start_id, start_id,
tcx.mk_args(&[ty::GenericArg::from(main_ret_ty)]), tcx.mk_args(&[ty::GenericArg::from(main_ret_ty)]),
) )
@ -427,7 +463,7 @@ pub fn create_ecx<'tcx>(
ExternAbi::Rust, ExternAbi::Rust,
&[ &[
ImmTy::from_scalar( ImmTy::from_scalar(
Scalar::from_pointer(main_ptr, &ecx), Scalar::from_pointer(main_ptr, ecx),
// FIXME use a proper fn ptr type // FIXME use a proper fn ptr type
ecx.machine.layouts.const_raw_ptr, ecx.machine.layouts.const_raw_ptr,
), ),
@ -450,7 +486,7 @@ pub fn create_ecx<'tcx>(
} }
} }
interp_ok(ecx) interp_ok(())
} }
/// Evaluates the entry function specified by `entry_id`. /// Evaluates the entry function specified by `entry_id`.

View file

@ -1235,8 +1235,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
interp_ok(()) interp_ok(())
} }
/// Lookup an array of immediates stored as a linker section of name `name`. /// Lookup an array of immediates from any linker sections matching the provided predicate.
fn lookup_link_section(&mut self, name: &str) -> InterpResult<'tcx, Vec<ImmTy<'tcx>>> { fn lookup_link_section(
&mut self,
include_name: impl Fn(&str) -> bool,
) -> InterpResult<'tcx, Vec<ImmTy<'tcx>>> {
let this = self.eval_context_mut(); let this = self.eval_context_mut();
let tcx = this.tcx.tcx; let tcx = this.tcx.tcx;
@ -1247,7 +1250,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
let Some(link_section) = attrs.link_section else { let Some(link_section) = attrs.link_section else {
return interp_ok(()); return interp_ok(());
}; };
if link_section.as_str() == name { if include_name(link_section.as_str()) {
let instance = ty::Instance::mono(tcx, def_id); let instance = ty::Instance::mono(tcx, def_id);
let const_val = this.eval_global(instance).unwrap_or_else(|err| { let const_val = this.eval_global(instance).unwrap_or_else(|err| {
panic!( panic!(
@ -1431,3 +1434,44 @@ impl ToU64 for usize {
self.try_into().unwrap() self.try_into().unwrap()
} }
} }
/// This struct is needed to enforce `#[must_use]` on values produced by [enter_trace_span] even
/// when the "tracing" feature is not enabled.
#[must_use]
pub struct MaybeEnteredTraceSpan {
#[cfg(feature = "tracing")]
pub _entered_span: tracing::span::EnteredSpan,
}
/// Enters a [tracing::info_span] only if the "tracing" feature is enabled, otherwise does nothing.
/// This is like [rustc_const_eval::enter_trace_span] except that it does not depend on the
/// [Machine] trait to check if tracing is enabled, because from the Miri codebase we can directly
/// check whether the "tracing" feature is enabled, unlike from the rustc_const_eval codebase.
///
/// In addition to the syntax accepted by [tracing::span!], this macro optionally allows passing
/// the span name (i.e. the first macro argument) in the form `NAME::SUBNAME` (without quotes) to
/// indicate that the span has name "NAME" (usually the name of the component) and has an additional
/// more specific name "SUBNAME" (usually the function name). The latter is passed to the [tracing]
/// infrastructure as a span field with the name "NAME". This allows not being distracted by
/// subnames when looking at the trace in <https://ui.perfetto.dev>, but when deeper introspection
/// is needed within a component, it's still possible to view the subnames directly in the UI by
/// selecting a span, clicking on the "NAME" argument on the right, and clicking on "Visualize
/// argument values".
/// ```rust
/// // for example, the first will expand to the second
/// enter_trace_span!(borrow_tracker::on_stack_pop, /* ... */)
/// enter_trace_span!("borrow_tracker", borrow_tracker = "on_stack_pop", /* ... */)
/// ```
#[macro_export]
macro_rules! enter_trace_span {
($name:ident :: $subname:ident $($tt:tt)*) => {{
enter_trace_span!(stringify!($name), $name = %stringify!(subname) $($tt)*)
}};
($($tt:tt)*) => {
$crate::MaybeEnteredTraceSpan {
#[cfg(feature = "tracing")]
_entered_span: tracing::info_span!($($tt)*).entered()
}
};
}

View file

@ -15,6 +15,7 @@
#![feature(unqualified_local_imports)] #![feature(unqualified_local_imports)]
#![feature(derive_coerce_pointee)] #![feature(derive_coerce_pointee)]
#![feature(arbitrary_self_types)] #![feature(arbitrary_self_types)]
#![feature(iter_advance_by)]
// Configure clippy and other lints // Configure clippy and other lints
#![allow( #![allow(
clippy::collapsible_else_if, clippy::collapsible_else_if,
@ -75,16 +76,15 @@ mod alloc_addresses;
mod borrow_tracker; mod borrow_tracker;
mod clock; mod clock;
mod concurrency; mod concurrency;
mod data_structures;
mod diagnostics; mod diagnostics;
mod eval; mod eval;
mod helpers; mod helpers;
mod intrinsics; mod intrinsics;
mod machine; mod machine;
mod math; mod math;
mod mono_hash_map;
mod operator; mod operator;
mod provenance_gc; mod provenance_gc;
mod range_map;
mod shims; mod shims;
// Establish a "crate-wide prelude": we often import `crate::*`. // Establish a "crate-wide prelude": we often import `crate::*`.
@ -97,10 +97,10 @@ pub use rustc_const_eval::interpret::{self, AllocMap, Provenance as _};
use rustc_middle::{bug, span_bug}; use rustc_middle::{bug, span_bug};
use tracing::{info, trace}; use tracing::{info, trace};
//#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
//pub mod native_lib { pub mod native_lib {
// pub use crate::shims::{init_sv, register_retcode_sv}; pub use crate::shims::{init_sv, register_retcode_sv};
//} }
// Type aliases that set the provenance parameter. // Type aliases that set the provenance parameter.
pub type Pointer = interpret::Pointer<Option<machine::Provenance>>; pub type Pointer = interpret::Pointer<Option<machine::Provenance>>;
@ -132,6 +132,8 @@ pub use crate::concurrency::thread::{
ThreadManager, TimeoutAnchor, TimeoutClock, UnblockKind, ThreadManager, TimeoutAnchor, TimeoutClock, UnblockKind,
}; };
pub use crate::concurrency::{GenmcConfig, GenmcCtx}; pub use crate::concurrency::{GenmcConfig, GenmcCtx};
pub use crate::data_structures::dedup_range_map::DedupRangeMap;
pub use crate::data_structures::mono_hash_map::MonoHashMap;
pub use crate::diagnostics::{ pub use crate::diagnostics::{
EvalContextExt as _, NonHaltingDiagnostic, TerminationInfo, report_error, EvalContextExt as _, NonHaltingDiagnostic, TerminationInfo, report_error,
}; };
@ -139,24 +141,25 @@ pub use crate::eval::{
AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, MiriEntryFnType, RejectOpWith, AlignmentCheck, BacktraceStyle, IsolatedOp, MiriConfig, MiriEntryFnType, RejectOpWith,
ValidationMode, create_ecx, eval_entry, ValidationMode, create_ecx, eval_entry,
}; };
pub use crate::helpers::{AccessKind, EvalContextExt as _, ToU64 as _, ToUsize as _}; pub use crate::helpers::{
AccessKind, EvalContextExt as _, MaybeEnteredTraceSpan, ToU64 as _, ToUsize as _,
};
pub use crate::intrinsics::EvalContextExt as _; pub use crate::intrinsics::EvalContextExt as _;
pub use crate::machine::{ pub use crate::machine::{
AllocExtra, DynMachineCallback, FrameExtra, MachineCallback, MemoryKind, MiriInterpCx, AllocExtra, DynMachineCallback, FrameExtra, MachineCallback, MemoryKind, MiriInterpCx,
MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra, MiriInterpCxExt, MiriMachine, MiriMemoryKind, PrimitiveLayouts, Provenance, ProvenanceExtra,
}; };
pub use crate::mono_hash_map::MonoHashMap;
pub use crate::operator::EvalContextExt as _; pub use crate::operator::EvalContextExt as _;
pub use crate::provenance_gc::{EvalContextExt as _, LiveAllocs, VisitProvenance, VisitWith}; pub use crate::provenance_gc::{EvalContextExt as _, LiveAllocs, VisitProvenance, VisitWith};
pub use crate::range_map::RangeMap;
pub use crate::shims::EmulateItemResult; pub use crate::shims::EmulateItemResult;
pub use crate::shims::env::{EnvVars, EvalContextExt as _}; pub use crate::shims::env::{EnvVars, EvalContextExt as _};
pub use crate::shims::foreign_items::{DynSym, EvalContextExt as _}; pub use crate::shims::foreign_items::{DynSym, EvalContextExt as _};
pub use crate::shims::io_error::{EvalContextExt as _, IoError, LibcError}; pub use crate::shims::io_error::{EvalContextExt as _, IoError, LibcError};
pub use crate::shims::os_str::EvalContextExt as _; pub use crate::shims::os_str::EvalContextExt as _;
pub use crate::shims::panic::{CatchUnwindData, EvalContextExt as _}; pub use crate::shims::panic::EvalContextExt as _;
pub use crate::shims::time::EvalContextExt as _; pub use crate::shims::time::EvalContextExt as _;
pub use crate::shims::tls::TlsData; pub use crate::shims::tls::TlsData;
pub use crate::shims::unwind::{CatchUnwindData, EvalContextExt as _};
/// Insert rustc arguments at the beginning of the argument list that Miri wants to be /// Insert rustc arguments at the beginning of the argument list that Miri wants to be
/// set per default, for maximal validation power. /// set per default, for maximal validation power.

View file

@ -1828,8 +1828,11 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
fn enter_trace_span(span: impl FnOnce() -> tracing::Span) -> impl EnteredTraceSpan { fn enter_trace_span(span: impl FnOnce() -> tracing::Span) -> impl EnteredTraceSpan {
#[cfg(feature = "tracing")] #[cfg(feature = "tracing")]
{ span().entered() } {
span().entered()
}
#[cfg(not(feature = "tracing"))] #[cfg(not(feature = "tracing"))]
#[expect(clippy::unused_unit)]
{ {
let _ = span; // so we avoid the "unused variable" warning let _ = span; // so we avoid the "unused variable" warning
() ()

View file

@ -0,0 +1,98 @@
//! Implement global constructors.
use std::task::Poll;
use rustc_abi::ExternAbi;
use rustc_target::spec::BinaryFormat;
use crate::*;
#[derive(Debug, Default)]
pub struct GlobalCtorState<'tcx>(GlobalCtorStatePriv<'tcx>);
#[derive(Debug, Default)]
enum GlobalCtorStatePriv<'tcx> {
#[default]
Init,
/// The list of constructor functions that we still have to call.
Ctors(Vec<ImmTy<'tcx>>),
Done,
}
impl<'tcx> GlobalCtorState<'tcx> {
pub fn on_stack_empty(
&mut self,
this: &mut MiriInterpCx<'tcx>,
) -> InterpResult<'tcx, Poll<()>> {
use GlobalCtorStatePriv::*;
let new_state = 'new_state: {
match &mut self.0 {
Init => {
let this = this.eval_context_mut();
// Lookup constructors from the relevant magic link section.
let ctors = match this.tcx.sess.target.binary_format {
// Read the CRT library section on Windows.
BinaryFormat::Coff =>
this.lookup_link_section(|section| section == ".CRT$XCU")?,
// Read the `__mod_init_func` section on macOS.
BinaryFormat::MachO =>
this.lookup_link_section(|section| {
let mut parts = section.splitn(3, ',');
let (segment_name, section_name, section_type) =
(parts.next(), parts.next(), parts.next());
segment_name == Some("__DATA")
&& section_name == Some("__mod_init_func")
// The `mod_init_funcs` directive ensures that the
// `S_MOD_INIT_FUNC_POINTERS` flag is set on the section. LLVM
// adds this automatically so we currently do not require it.
// FIXME: is this guaranteed LLVM behavior? If not, we shouldn't
// implicitly add it here. Also see
// <https://github.com/rust-lang/miri/pull/4459#discussion_r2200115629>.
&& matches!(section_type, None | Some("mod_init_funcs"))
})?,
// Read the standard `.init_array` section on platforms that use ELF, or WASM,
// which supports the same linker directive.
// FIXME: Add support for `.init_array.N` and `.ctors`?
BinaryFormat::Elf | BinaryFormat::Wasm =>
this.lookup_link_section(|section| section == ".init_array")?,
// Other platforms have no global ctor support.
_ => break 'new_state Done,
};
break 'new_state Ctors(ctors);
}
Ctors(ctors) => {
if let Some(ctor) = ctors.pop() {
let this = this.eval_context_mut();
let ctor = ctor.to_scalar().to_pointer(this)?;
let thread_callback = this.get_ptr_fn(ctor)?.as_instance()?;
// The signature of this function is `unsafe extern "C" fn()`.
this.call_function(
thread_callback,
ExternAbi::C { unwind: false },
&[],
None,
ReturnContinuation::Stop { cleanup: true },
)?;
return interp_ok(Poll::Pending); // we stay in this state (but `ctors` got shorter)
}
// No more constructors to run.
break 'new_state Done;
}
Done => return interp_ok(Poll::Ready(())),
}
};
self.0 = new_state;
interp_ok(Poll::Pending)
}
}

View file

@ -14,15 +14,17 @@ mod x86;
pub mod env; pub mod env;
pub mod extern_static; pub mod extern_static;
pub mod foreign_items; pub mod foreign_items;
pub mod global_ctor;
pub mod io_error; pub mod io_error;
pub mod os_str; pub mod os_str;
pub mod panic; pub mod panic;
pub mod time; pub mod time;
pub mod tls; pub mod tls;
pub mod unwind;
pub use self::files::FdTable; pub use self::files::FdTable;
//#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
//pub use self::native_lib::trace::{init_sv, register_retcode_sv}; pub use self::native_lib::trace::{init_sv, register_retcode_sv};
pub use self::unix::{DirTable, EpollInterestTable}; pub use self::unix::{DirTable, EpollInterestTable};
/// What needs to be done after emulating an item (a shim or an intrinsic) is done. /// What needs to be done after emulating an item (a shim or an intrinsic) is done.

View file

@ -1,9 +1,5 @@
//! Implements calling functions from a native library. //! Implements calling functions from a native library.
// FIXME: disabled since it fails to build on many targets.
//#[cfg(target_os = "linux")]
//pub mod trace;
use std::ops::Deref; use std::ops::Deref;
use libffi::high::call as ffi; use libffi::high::call as ffi;
@ -13,14 +9,68 @@ use rustc_middle::mir::interpret::Pointer;
use rustc_middle::ty::{self as ty, IntTy, UintTy}; use rustc_middle::ty::{self as ty, IntTy, UintTy};
use rustc_span::Symbol; use rustc_span::Symbol;
//#[cfg(target_os = "linux")] #[cfg_attr(
//use self::trace::Supervisor; not(all(
target_os = "linux",
target_env = "gnu",
any(target_arch = "x86", target_arch = "x86_64")
)),
path = "trace/stub.rs"
)]
pub mod trace;
use crate::*; use crate::*;
//#[cfg(target_os = "linux")] /// The final results of an FFI trace, containing every relevant event detected
//type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option<self::trace::messages::MemEvents>)>; /// by the tracer.
//#[cfg(not(target_os = "linux"))] #[allow(dead_code)]
type CallResult<'tcx> = InterpResult<'tcx, (ImmTy<'tcx>, Option<!>)>; #[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug)]
pub struct MemEvents {
/// An list of memory accesses that occurred, in the order they occurred in.
pub acc_events: Vec<AccessEvent>,
}
/// A single memory access.
#[allow(dead_code)]
#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
#[derive(Clone, Debug)]
pub enum AccessEvent {
/// A read occurred on this memory range.
Read(AccessRange),
/// A write may have occurred on this memory range.
/// Some instructions *may* write memory without *always* doing that,
/// so this can be an over-approximation.
/// The range info, however, is reliable if the access did happen.
/// If the second field is true, the access definitely happened.
Write(AccessRange, bool),
}
impl AccessEvent {
fn get_range(&self) -> AccessRange {
match self {
AccessEvent::Read(access_range) => access_range.clone(),
AccessEvent::Write(access_range, _) => access_range.clone(),
}
}
}
/// The memory touched by a given access.
#[allow(dead_code)]
#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
#[derive(Clone, Debug)]
pub struct AccessRange {
/// The base address in memory where an access occurred.
pub addr: usize,
/// The number of bytes affected from the base.
pub size: usize,
}
impl AccessRange {
fn end(&self) -> usize {
self.addr.strict_add(self.size)
}
}
impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {} impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
@ -31,18 +81,17 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
dest: &MPlaceTy<'tcx>, dest: &MPlaceTy<'tcx>,
ptr: CodePtr, ptr: CodePtr,
libffi_args: Vec<libffi::high::Arg<'a>>, libffi_args: Vec<libffi::high::Arg<'a>>,
) -> CallResult<'tcx> { ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
let this = self.eval_context_mut(); let this = self.eval_context_mut();
//#[cfg(target_os = "linux")] #[cfg(target_os = "linux")]
//let alloc = this.machine.allocator.as_ref().unwrap(); let alloc = this.machine.allocator.as_ref().unwrap();
#[cfg(not(target_os = "linux"))]
// Placeholder value.
let alloc = ();
// SAFETY: We don't touch the machine memory past this point. trace::Supervisor::do_ffi(alloc, || {
//#[cfg(target_os = "linux")] // Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
//let (guard, stack_ptr) = unsafe { Supervisor::start_ffi(alloc) }; // as the specified primitive integer type
// Call the function (`ptr`) with arguments `libffi_args`, and obtain the return value
// as the specified primitive integer type
let res = 'res: {
let scalar = match dest.layout.ty.kind() { let scalar = match dest.layout.ty.kind() {
// ints // ints
ty::Int(IntTy::I8) => { ty::Int(IntTy::I8) => {
@ -93,7 +142,7 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
// have the output_type `Tuple([])`. // have the output_type `Tuple([])`.
ty::Tuple(t_list) if (*t_list).deref().is_empty() => { ty::Tuple(t_list) if (*t_list).deref().is_empty() => {
unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) }; unsafe { ffi::call::<()>(ptr, libffi_args.as_slice()) };
break 'res interp_ok(ImmTy::uninit(dest.layout)); return interp_ok(ImmTy::uninit(dest.layout));
} }
ty::RawPtr(..) => { ty::RawPtr(..) => {
let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) }; let x = unsafe { ffi::call::<*const ()>(ptr, libffi_args.as_slice()) };
@ -101,23 +150,14 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
Scalar::from_pointer(ptr, this) Scalar::from_pointer(ptr, this)
} }
_ => _ =>
break 'res Err(err_unsup_format!( return Err(err_unsup_format!(
"unsupported return type for native call: {:?}", "unsupported return type for native call: {:?}",
link_name link_name
)) ))
.into(), .into(),
}; };
interp_ok(ImmTy::from_scalar(scalar, dest.layout)) interp_ok(ImmTy::from_scalar(scalar, dest.layout))
}; })
// SAFETY: We got the guard and stack pointer from start_ffi, and
// the allocator is the same
//#[cfg(target_os = "linux")]
//let events = unsafe { Supervisor::end_ffi(alloc, guard, stack_ptr) };
//#[cfg(not(target_os = "linux"))]
let events = None;
interp_ok((res?, events))
} }
/// Get the pointer to the function of the specified name in the shared object file, /// Get the pointer to the function of the specified name in the shared object file,
@ -169,6 +209,73 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
} }
None None
} }
/// Applies the `events` to Miri's internal state. The event vector must be
/// ordered sequentially by when the accesses happened, and the sizes are
/// assumed to be exact.
fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
for evt in events.acc_events {
let evt_rg = evt.get_range();
// LLVM at least permits vectorising accesses to adjacent allocations,
// so we cannot assume 1 access = 1 allocation. :(
let mut rg = evt_rg.addr..evt_rg.end();
while let Some(curr) = rg.next() {
let Some(alloc_id) = this.alloc_id_from_addr(
curr.to_u64(),
rg.len().try_into().unwrap(),
/* only_exposed_allocations */ true,
) else {
throw_ub_format!("Foreign code did an out-of-bounds access!")
};
let alloc = this.get_alloc_raw(alloc_id)?;
// The logical and physical address of the allocation coincide, so we can use
// this instead of `addr_from_alloc_id`.
let alloc_addr = alloc.get_bytes_unchecked_raw().addr();
// Determine the range inside the allocation that this access covers. This range is
// in terms of offsets from the start of `alloc`. The start of the overlap range
// will be `curr`; the end will be the minimum of the end of the allocation and the
// end of the access' range.
let overlap = curr.strict_sub(alloc_addr)
..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
// Skip forward however many bytes of the access are contained in the current
// allocation, subtracting 1 since the overlap range includes the current addr
// that was already popped off of the range.
rg.advance_by(overlap.len().strict_sub(1)).unwrap();
match evt {
AccessEvent::Read(_) => {
// FIXME: ProvenanceMap should have something like get_range().
let p_map = alloc.provenance();
for idx in overlap {
// If a provenance was read by the foreign code, expose it.
if let Some(prov) = p_map.get(Size::from_bytes(idx), this) {
this.expose_provenance(prov)?;
}
}
}
AccessEvent::Write(_, certain) => {
// Sometimes we aren't certain if a write happened, in which case we
// only initialise that data if the allocation is mutable.
if certain || alloc.mutability.is_mut() {
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
alloc.process_native_write(
&cx.tcx,
Some(AllocRange {
start: Size::from_bytes(overlap.start),
size: Size::from_bytes(overlap.len()),
}),
)
}
}
}
}
}
interp_ok(())
}
} }
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
@ -194,6 +301,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
} }
}; };
// Do we have ptrace?
let tracing = trace::Supervisor::is_enabled();
// Get the function arguments, and convert them to `libffi`-compatible form. // Get the function arguments, and convert them to `libffi`-compatible form.
let mut libffi_args = Vec::<CArg>::with_capacity(args.len()); let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
for arg in args.iter() { for arg in args.iter() {
@ -213,12 +323,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// The first time this happens, print a warning. // The first time this happens, print a warning.
if !this.machine.native_call_mem_warned.replace(true) { if !this.machine.native_call_mem_warned.replace(true) {
// Newly set, so first time we get here. // Newly set, so first time we get here.
this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
//#[cfg(target_os = "linux")]
//tracing: self::trace::Supervisor::is_enabled(),
//#[cfg(not(target_os = "linux"))]
tracing: false,
});
} }
this.expose_provenance(prov)?; this.expose_provenance(prov)?;
@ -245,15 +350,23 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// be read by FFI. The `black_box` is defensive programming as LLVM likes // be read by FFI. The `black_box` is defensive programming as LLVM likes
// to (incorrectly) optimize away ptr2int casts whose result is unused. // to (incorrectly) optimize away ptr2int casts whose result is unused.
std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance()); std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
// Expose all provenances in this allocation, since the native code can do $whatever.
for prov in alloc.provenance().provenances() { if !tracing {
this.expose_provenance(prov)?; // Expose all provenances in this allocation, since the native code can do $whatever.
// Can be skipped when tracing; in that case we'll expose just the actually-read parts later.
for prov in alloc.provenance().provenances() {
this.expose_provenance(prov)?;
}
} }
// Prepare for possible write from native code if mutable. // Prepare for possible write from native code if mutable.
if info.mutbl.is_mut() { if info.mutbl.is_mut() {
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?; let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
alloc.process_native_write(&cx.tcx, None); // These writes could initialize everything and wreck havoc with the pointers.
// We can skip that when tracing; in that case we'll later do that only for the memory that got actually written.
if !tracing {
alloc.process_native_write(&cx.tcx, None);
}
// Also expose *mutable* provenance for the interpreter-level allocation. // Also expose *mutable* provenance for the interpreter-level allocation.
std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance()); std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
} }
@ -265,10 +378,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
let (ret, maybe_memevents) = let (ret, maybe_memevents) =
this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?; this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
if cfg!(target_os = "linux") if tracing {
&& let Some(events) = maybe_memevents this.tracing_apply_accesses(maybe_memevents.unwrap())?;
{
trace!("Registered FFI events:\n{events:#0x?}");
} }
this.write_immediate(*ret, dest)?; this.write_immediate(*ret, dest)?;

View file

@ -4,12 +4,22 @@ use std::rc::Rc;
use ipc_channel::ipc; use ipc_channel::ipc;
use nix::sys::{ptrace, signal}; use nix::sys::{ptrace, signal};
use nix::unistd; use nix::unistd;
use rustc_const_eval::interpret::InterpResult;
use super::CALLBACK_STACK_SIZE; use super::CALLBACK_STACK_SIZE;
use super::messages::{Confirmation, MemEvents, StartFfiInfo, TraceRequest}; use super::messages::{Confirmation, StartFfiInfo, TraceRequest};
use super::parent::{ChildListener, sv_loop}; use super::parent::{ChildListener, sv_loop};
use crate::alloc::isolated_alloc::IsolatedAlloc; use crate::alloc::isolated_alloc::IsolatedAlloc;
use crate::shims::native_lib::MemEvents;
/// A handle to the single, shared supervisor process across all `MiriMachine`s.
/// Since it would be very difficult to trace multiple FFI calls in parallel, we
/// need to ensure that either (a) only one `MiriMachine` is performing an FFI call
/// at any given time, or (b) there are distinct supervisor and child processes for
/// each machine. The former was chosen here.
///
/// This should only contain a `None` if the supervisor has not (yet) been initialised;
/// otherwise, if `init_sv` was called and did not error, this will always be nonempty.
static SUPERVISOR: std::sync::Mutex<Option<Supervisor>> = std::sync::Mutex::new(None); static SUPERVISOR: std::sync::Mutex<Option<Supervisor>> = std::sync::Mutex::new(None);
/// The main means of communication between the child and parent process, /// The main means of communication between the child and parent process,
@ -34,32 +44,23 @@ impl Supervisor {
SUPERVISOR.lock().unwrap().is_some() SUPERVISOR.lock().unwrap().is_some()
} }
/// Begins preparations for doing an FFI call. This should be called at /// Performs an arbitrary FFI call, enabling tracing from the supervisor.
/// the last possible moment before entering said call. `alloc` points to
/// the allocator which handed out the memory used for this machine.
///
/// As this locks the supervisor via a mutex, no other threads may enter FFI /// As this locks the supervisor via a mutex, no other threads may enter FFI
/// until this one returns and its guard is dropped via `end_ffi`. The /// until this function returns.
/// pointer returned should be passed to `end_ffi` to avoid a memory leak. pub fn do_ffi<'tcx>(
///
/// SAFETY: The resulting guard must be dropped *via `end_ffi`* immediately
/// after the desired call has concluded.
pub unsafe fn start_ffi(
alloc: &Rc<RefCell<IsolatedAlloc>>, alloc: &Rc<RefCell<IsolatedAlloc>>,
) -> (std::sync::MutexGuard<'static, Option<Supervisor>>, Option<*mut [u8; CALLBACK_STACK_SIZE]>) f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>,
{ ) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<MemEvents>)> {
let mut sv_guard = SUPERVISOR.lock().unwrap(); let mut sv_guard = SUPERVISOR.lock().unwrap();
// If the supervisor is not initialised for whatever reason, fast-fail. // If the supervisor is not initialised for whatever reason, fast-return.
// This might be desired behaviour, as even on platforms where ptracing // As a side-effect, even on platforms where ptracing
// is not implemented it enables us to enforce that only one FFI call // is not implemented, we enforce that only one FFI call
// happens at a time. // happens at a time.
let Some(sv) = sv_guard.take() else { let Some(sv) = sv_guard.as_mut() else { return f().map(|v| (v, None)) };
return (sv_guard, None);
};
// Get pointers to all the pages the supervisor must allow accesses in // Get pointers to all the pages the supervisor must allow accesses in
// and prepare the callback stack. // and prepare the callback stack.
let page_ptrs = alloc.borrow().pages(); let page_ptrs = alloc.borrow().pages().collect();
let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] = let raw_stack_ptr: *mut [u8; CALLBACK_STACK_SIZE] =
Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast(); Box::leak(Box::new([0u8; CALLBACK_STACK_SIZE])).as_mut_ptr().cast();
let stack_ptr = raw_stack_ptr.expose_provenance(); let stack_ptr = raw_stack_ptr.expose_provenance();
@ -68,9 +69,9 @@ impl Supervisor {
// SAFETY: We do not access machine memory past this point until the // SAFETY: We do not access machine memory past this point until the
// supervisor is ready to allow it. // supervisor is ready to allow it.
unsafe { unsafe {
if alloc.borrow_mut().prepare_ffi().is_err() { if alloc.borrow_mut().start_ffi().is_err() {
// Don't mess up unwinding by maybe leaving the memory partly protected // Don't mess up unwinding by maybe leaving the memory partly protected
alloc.borrow_mut().unprep_ffi(); alloc.borrow_mut().end_ffi();
panic!("Cannot protect memory for FFI call!"); panic!("Cannot protect memory for FFI call!");
} }
} }
@ -82,27 +83,13 @@ impl Supervisor {
// enforce an ordering for these events. // enforce an ordering for these events.
sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap(); sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap();
sv.confirm_rx.recv().unwrap(); sv.confirm_rx.recv().unwrap();
*sv_guard = Some(sv);
// We need to be stopped for the supervisor to be able to make certain // We need to be stopped for the supervisor to be able to make certain
// modifications to our memory - simply waiting on the recv() doesn't // modifications to our memory - simply waiting on the recv() doesn't
// count. // count.
signal::raise(signal::SIGSTOP).unwrap(); signal::raise(signal::SIGSTOP).unwrap();
(sv_guard, Some(raw_stack_ptr))
}
/// Undoes FFI-related preparations, allowing Miri to continue as normal, then let res = f();
/// gets the memory accesses and changes performed during the FFI call. Note
/// that this may include some spurious accesses done by `libffi` itself in
/// the process of executing the function call.
///
/// SAFETY: The `sv_guard` and `raw_stack_ptr` passed must be the same ones
/// received by a prior call to `start_ffi`, and the allocator must be the
/// one passed to it also.
pub unsafe fn end_ffi(
alloc: &Rc<RefCell<IsolatedAlloc>>,
mut sv_guard: std::sync::MutexGuard<'static, Option<Supervisor>>,
raw_stack_ptr: Option<*mut [u8; CALLBACK_STACK_SIZE]>,
) -> Option<MemEvents> {
// We can't use IPC channels here to signal that FFI mode has ended, // We can't use IPC channels here to signal that FFI mode has ended,
// since they might allocate memory which could get us stuck in a SIGTRAP // since they might allocate memory which could get us stuck in a SIGTRAP
// with no easy way out! While this could be worked around, it is much // with no easy way out! While this could be worked around, it is much
@ -113,42 +100,40 @@ impl Supervisor {
signal::raise(signal::SIGUSR1).unwrap(); signal::raise(signal::SIGUSR1).unwrap();
// This is safe! It just sets memory to normal expected permissions. // This is safe! It just sets memory to normal expected permissions.
alloc.borrow_mut().unprep_ffi(); alloc.borrow_mut().end_ffi();
// If this is `None`, then `raw_stack_ptr` is None and does not need to
// be deallocated (and there's no need to worry about the guard, since
// it contains nothing).
let sv = sv_guard.take()?;
// SAFETY: Caller upholds that this pointer was allocated as a box with // SAFETY: Caller upholds that this pointer was allocated as a box with
// this type. // this type.
unsafe { unsafe {
drop(Box::from_raw(raw_stack_ptr.unwrap())); drop(Box::from_raw(raw_stack_ptr));
} }
// On the off-chance something really weird happens, don't block forever. // On the off-chance something really weird happens, don't block forever.
let ret = sv let events = sv
.event_rx .event_rx
.try_recv_timeout(std::time::Duration::from_secs(5)) .try_recv_timeout(std::time::Duration::from_secs(5))
.map_err(|e| { .map_err(|e| {
match e { match e {
ipc::TryRecvError::IpcError(_) => (), ipc::TryRecvError::IpcError(_) => (),
ipc::TryRecvError::Empty => ipc::TryRecvError::Empty =>
eprintln!("Waiting for accesses from supervisor timed out!"), panic!("Waiting for accesses from supervisor timed out!"),
} }
}) })
.ok(); .ok();
// Do *not* leave the supervisor empty, or else we might get another fork...
*sv_guard = Some(sv); res.map(|v| (v, events))
ret
} }
} }
/// Initialises the supervisor process. If this function errors, then the /// Initialises the supervisor process. If this function errors, then the
/// supervisor process could not be created successfully; else, the caller /// supervisor process could not be created successfully; else, the caller
/// is now the child process and can communicate via `start_ffi`/`end_ffi`, /// is now the child process and can communicate via `do_ffi`, receiving back
/// receiving back events through `get_events`. /// events at the end.
/// ///
/// # Safety /// # Safety
/// The invariants for `fork()` must be upheld by the caller. /// The invariants for `fork()` must be upheld by the caller, namely either:
/// - Other threads do not exist, or;
/// - If they do exist, either those threads or the resulting child process
/// only ever act in [async-signal-safe](https://www.man7.org/linux/man-pages/man7/signal-safety.7.html) ways.
pub unsafe fn init_sv() -> Result<(), SvInitError> { pub unsafe fn init_sv() -> Result<(), SvInitError> {
// FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the // FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the
// relevant missing bits. // relevant missing bits.
@ -191,8 +176,7 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
// The child process is free to unwind, so we won't to avoid doubly freeing // The child process is free to unwind, so we won't to avoid doubly freeing
// system resources. // system resources.
let init = std::panic::catch_unwind(|| { let init = std::panic::catch_unwind(|| {
let listener = let listener = ChildListener::new(message_rx, confirm_tx.clone());
ChildListener { message_rx, attached: false, override_retcode: None };
// Trace as many things as possible, to be able to handle them as needed. // Trace as many things as possible, to be able to handle them as needed.
let options = ptrace::Options::PTRACE_O_TRACESYSGOOD let options = ptrace::Options::PTRACE_O_TRACESYSGOOD
| ptrace::Options::PTRACE_O_TRACECLONE | ptrace::Options::PTRACE_O_TRACECLONE
@ -218,7 +202,9 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
// The "Ok" case means that we couldn't ptrace. // The "Ok" case means that we couldn't ptrace.
Ok(e) => return Err(e), Ok(e) => return Err(e),
Err(p) => { Err(p) => {
eprintln!("Supervisor process panicked!\n{p:?}"); eprintln!(
"Supervisor process panicked!\n{p:?}\n\nTry running again without using the native-lib tracer."
);
std::process::exit(1); std::process::exit(1);
} }
} }
@ -239,13 +225,11 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
} }
/// Instruct the supervisor process to return a particular code. Useful if for /// Instruct the supervisor process to return a particular code. Useful if for
/// whatever reason this code fails to be intercepted normally. In the case of /// whatever reason this code fails to be intercepted normally.
/// `abort_if_errors()` used in `bin/miri.rs`, the return code is erroneously
/// given as a 0 if this is not used.
pub fn register_retcode_sv(code: i32) { pub fn register_retcode_sv(code: i32) {
let mut sv_guard = SUPERVISOR.lock().unwrap(); let mut sv_guard = SUPERVISOR.lock().unwrap();
if let Some(sv) = sv_guard.take() { if let Some(sv) = sv_guard.as_mut() {
sv.message_tx.send(TraceRequest::OverrideRetcode(code)).unwrap(); sv.message_tx.send(TraceRequest::OverrideRetcode(code)).unwrap();
*sv_guard = Some(sv); sv.confirm_rx.recv().unwrap();
} }
} }

View file

@ -1,25 +1,28 @@
//! Houses the types that are directly sent across the IPC channels. //! Houses the types that are directly sent across the IPC channels.
//! //!
//! The overall structure of a traced FFI call, from the child process's POV, is //! When forking to initialise the supervisor during `init_sv`, the child raises
//! as follows: //! a `SIGSTOP`; if the parent successfully ptraces the child, it will allow it
//! to resume. Else, the child will be killed by the parent.
//!
//! After initialisation is done, the overall structure of a traced FFI call from
//! the child process's POV is as follows:
//! ``` //! ```
//! message_tx.send(TraceRequest::StartFfi); //! message_tx.send(TraceRequest::StartFfi);
//! confirm_rx.recv(); //! confirm_rx.recv(); // receives a `Confirmation`
//! raise(SIGSTOP); //! raise(SIGSTOP);
//! /* do ffi call */ //! /* do ffi call */
//! raise(SIGUSR1); // morally equivalent to some kind of "TraceRequest::EndFfi" //! raise(SIGUSR1); // morally equivalent to some kind of "TraceRequest::EndFfi"
//! let events = event_rx.recv(); //! let events = event_rx.recv(); // receives a `MemEvents`
//! ``` //! ```
//! `TraceRequest::OverrideRetcode` can be sent at any point in the above, including //! `TraceRequest::OverrideRetcode` can be sent at any point in the above, including
//! before or after all of them. //! before or after all of them. `confirm_rx.recv()` is to be called after, to ensure
//! that the child does not exit before the supervisor has registered the return code.
//! //!
//! NB: sending these events out of order, skipping steps, etc. will result in //! NB: sending these events out of order, skipping steps, etc. will result in
//! unspecified behaviour from the supervisor process, so use the abstractions //! unspecified behaviour from the supervisor process, so use the abstractions
//! in `super::child` (namely `start_ffi()` and `end_ffi()`) to handle this. It is //! in `super::child` (namely `do_ffi()`) to handle this. It is
//! trivially easy to cause a deadlock or crash by messing this up! //! trivially easy to cause a deadlock or crash by messing this up!
use std::ops::Range;
/// An IPC request sent by the child process to the parent. /// An IPC request sent by the child process to the parent.
/// ///
/// The sender for this channel should live on the child process. /// The sender for this channel should live on the child process.
@ -34,6 +37,8 @@ pub enum TraceRequest {
StartFfi(StartFfiInfo), StartFfi(StartFfiInfo),
/// Manually overrides the code that the supervisor will return upon exiting. /// Manually overrides the code that the supervisor will return upon exiting.
/// Once set, it is permanent. This can be called again to change the value. /// Once set, it is permanent. This can be called again to change the value.
///
/// After sending this, the child must wait to receive a `Confirmation`.
OverrideRetcode(i32), OverrideRetcode(i32),
} }
@ -41,7 +46,7 @@ pub enum TraceRequest {
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)] #[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
pub struct StartFfiInfo { pub struct StartFfiInfo {
/// A vector of page addresses. These should have been automatically obtained /// A vector of page addresses. These should have been automatically obtained
/// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::prepare_ffi`. /// with `IsolatedAlloc::pages` and prepared with `IsolatedAlloc::start_ffi`.
pub page_ptrs: Vec<usize>, pub page_ptrs: Vec<usize>,
/// The address of an allocation that can serve as a temporary stack. /// The address of an allocation that can serve as a temporary stack.
/// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int. /// This should be a leaked `Box<[u8; CALLBACK_STACK_SIZE]>` cast to an int.
@ -54,27 +59,3 @@ pub struct StartFfiInfo {
/// The sender for this channel should live on the parent process. /// The sender for this channel should live on the parent process.
#[derive(serde::Serialize, serde::Deserialize, Debug)] #[derive(serde::Serialize, serde::Deserialize, Debug)]
pub struct Confirmation; pub struct Confirmation;
/// The final results of an FFI trace, containing every relevant event detected
/// by the tracer. Sent by the supervisor after receiving a `SIGUSR1` signal.
///
/// The sender for this channel should live on the parent process.
#[derive(serde::Serialize, serde::Deserialize, Debug)]
pub struct MemEvents {
/// An ordered list of memory accesses that occurred. These should be assumed
/// to be overcautious; that is, if the size of an access is uncertain it is
/// pessimistically rounded up, and if the type (read/write/both) is uncertain
/// it is reported as whatever would be safest to assume; i.e. a read + maybe-write
/// becomes a read + write, etc.
pub acc_events: Vec<AccessEvent>,
}
/// A single memory access, conservatively overestimated
/// in case of ambiguity.
#[derive(serde::Serialize, serde::Deserialize, Debug)]
pub enum AccessEvent {
/// A read may have occurred on no more than the specified address range.
Read(Range<usize>),
/// A write may have occurred on no more than the specified address range.
Write(Range<usize>),
}

View file

@ -5,4 +5,6 @@ mod parent;
pub use self::child::{Supervisor, init_sv, register_retcode_sv}; pub use self::child::{Supervisor, init_sv, register_retcode_sv};
/// The size of the temporary stack we use for callbacks that the server executes in the client. /// The size of the temporary stack we use for callbacks that the server executes in the client.
/// This should be big enough that `mempr_on` and `mempr_off` can safely be jumped into with the
/// stack pointer pointing to a "stack" of this size without overflowing it.
const CALLBACK_STACK_SIZE: usize = 1024; const CALLBACK_STACK_SIZE: usize = 1024;

View file

@ -5,26 +5,17 @@ use nix::sys::{ptrace, signal, wait};
use nix::unistd; use nix::unistd;
use super::CALLBACK_STACK_SIZE; use super::CALLBACK_STACK_SIZE;
use super::messages::{AccessEvent, Confirmation, MemEvents, StartFfiInfo, TraceRequest}; use super::messages::{Confirmation, StartFfiInfo, TraceRequest};
use crate::shims::native_lib::{AccessEvent, AccessRange, MemEvents};
/// The flags to use when calling `waitid()`. /// The flags to use when calling `waitid()`.
/// Since bitwise or on the nix version of these flags is implemented as a trait,
/// this cannot be const directly so we do it this way.
const WAIT_FLAGS: wait::WaitPidFlag = const WAIT_FLAGS: wait::WaitPidFlag =
wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED); wait::WaitPidFlag::WUNTRACED.union(wait::WaitPidFlag::WEXITED);
/// Arch-specific maximum size a single access might perform. x86 value is set
/// assuming nothing bigger than AVX-512 is available.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
const ARCH_MAX_ACCESS_SIZE: usize = 64;
/// The largest arm64 simd instruction operates on 16 bytes.
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
const ARCH_MAX_ACCESS_SIZE: usize = 16;
/// The default word size on a given platform, in bytes. /// The default word size on a given platform, in bytes.
#[cfg(any(target_arch = "x86", target_arch = "arm"))] #[cfg(target_arch = "x86")]
const ARCH_WORD_SIZE: usize = 4; const ARCH_WORD_SIZE: usize = 4;
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] #[cfg(target_arch = "x86_64")]
const ARCH_WORD_SIZE: usize = 8; const ARCH_WORD_SIZE: usize = 8;
/// The address of the page set to be edited, initialised to a sentinel null /// The address of the page set to be edited, initialised to a sentinel null
@ -53,39 +44,25 @@ trait ArchIndependentRegs {
// It's fine / desirable behaviour for values to wrap here, we care about just // It's fine / desirable behaviour for values to wrap here, we care about just
// preserving the bit pattern. // preserving the bit pattern.
#[cfg(target_arch = "x86_64")] #[cfg(target_arch = "x86_64")]
#[expect(clippy::as_conversions)]
#[rustfmt::skip] #[rustfmt::skip]
impl ArchIndependentRegs for libc::user_regs_struct { impl ArchIndependentRegs for libc::user_regs_struct {
#[inline] #[inline]
fn ip(&self) -> usize { self.rip as _ } fn ip(&self) -> usize { self.rip.try_into().unwrap() }
#[inline] #[inline]
fn set_ip(&mut self, ip: usize) { self.rip = ip as _ } fn set_ip(&mut self, ip: usize) { self.rip = ip.try_into().unwrap() }
#[inline] #[inline]
fn set_sp(&mut self, sp: usize) { self.rsp = sp as _ } fn set_sp(&mut self, sp: usize) { self.rsp = sp.try_into().unwrap() }
} }
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
#[expect(clippy::as_conversions)]
#[rustfmt::skip] #[rustfmt::skip]
impl ArchIndependentRegs for libc::user_regs_struct { impl ArchIndependentRegs for libc::user_regs_struct {
#[inline] #[inline]
fn ip(&self) -> usize { self.eip as _ } fn ip(&self) -> usize { self.eip.cast_unsigned().try_into().unwrap() }
#[inline] #[inline]
fn set_ip(&mut self, ip: usize) { self.eip = ip as _ } fn set_ip(&mut self, ip: usize) { self.eip = ip.cast_signed().try_into().unwrap() }
#[inline] #[inline]
fn set_sp(&mut self, sp: usize) { self.esp = sp as _ } fn set_sp(&mut self, sp: usize) { self.esp = sp.cast_signed().try_into().unwrap() }
}
#[cfg(target_arch = "aarch64")]
#[expect(clippy::as_conversions)]
#[rustfmt::skip]
impl ArchIndependentRegs for libc::user_regs_struct {
#[inline]
fn ip(&self) -> usize { self.pc as _ }
#[inline]
fn set_ip(&mut self, ip: usize) { self.pc = ip as _ }
#[inline]
fn set_sp(&mut self, sp: usize) { self.sp = sp as _ }
} }
/// A unified event representing something happening on the child process. Wraps /// A unified event representing something happening on the child process. Wraps
@ -109,11 +86,24 @@ pub enum ExecEvent {
/// A listener for the FFI start info channel along with relevant state. /// A listener for the FFI start info channel along with relevant state.
pub struct ChildListener { pub struct ChildListener {
/// The matching channel for the child's `Supervisor` struct. /// The matching channel for the child's `Supervisor` struct.
pub message_rx: ipc::IpcReceiver<TraceRequest>, message_rx: ipc::IpcReceiver<TraceRequest>,
/// ...
confirm_tx: ipc::IpcSender<Confirmation>,
/// Whether an FFI call is currently ongoing. /// Whether an FFI call is currently ongoing.
pub attached: bool, attached: bool,
/// If `Some`, overrides the return code with the given value. /// If `Some`, overrides the return code with the given value.
pub override_retcode: Option<i32>, override_retcode: Option<i32>,
/// Last code obtained from a child exiting.
last_code: Option<i32>,
}
impl ChildListener {
pub fn new(
message_rx: ipc::IpcReceiver<TraceRequest>,
confirm_tx: ipc::IpcSender<Confirmation>,
) -> Self {
Self { message_rx, confirm_tx, attached: false, override_retcode: None, last_code: None }
}
} }
impl Iterator for ChildListener { impl Iterator for ChildListener {
@ -133,16 +123,10 @@ impl Iterator for ChildListener {
Ok(stat) => Ok(stat) =>
match stat { match stat {
// Child exited normally with a specific code set. // Child exited normally with a specific code set.
wait::WaitStatus::Exited(_, code) => { wait::WaitStatus::Exited(_, code) => self.last_code = Some(code),
let code = self.override_retcode.unwrap_or(code);
return Some(ExecEvent::Died(Some(code)));
}
// Child was killed by a signal, without giving a code. // Child was killed by a signal, without giving a code.
wait::WaitStatus::Signaled(_, _, _) => wait::WaitStatus::Signaled(_, _, _) => self.last_code = None,
return Some(ExecEvent::Died(self.override_retcode)), // Child entered or exited a syscall.
// Child entered a syscall. Since we're always technically
// tracing, only pass this along if we're actively
// monitoring the child.
wait::WaitStatus::PtraceSyscall(pid) => wait::WaitStatus::PtraceSyscall(pid) =>
if self.attached { if self.attached {
return Some(ExecEvent::Syscall(pid)); return Some(ExecEvent::Syscall(pid));
@ -179,10 +163,8 @@ impl Iterator for ChildListener {
}, },
_ => (), _ => (),
}, },
// This case should only trigger if all children died and we // This case should only trigger when all children died.
// somehow missed that, but it's best we not allow any room Err(_) => return Some(ExecEvent::Died(self.override_retcode.or(self.last_code))),
// for deadlocks.
Err(_) => return Some(ExecEvent::Died(None)),
} }
// Similarly, do a non-blocking poll of the IPC channel. // Similarly, do a non-blocking poll of the IPC channel.
@ -196,7 +178,10 @@ impl Iterator for ChildListener {
self.attached = true; self.attached = true;
return Some(ExecEvent::Start(info)); return Some(ExecEvent::Start(info));
}, },
TraceRequest::OverrideRetcode(code) => self.override_retcode = Some(code), TraceRequest::OverrideRetcode(code) => {
self.override_retcode = Some(code);
self.confirm_tx.send(Confirmation).unwrap();
}
} }
} }
@ -211,6 +196,12 @@ impl Iterator for ChildListener {
#[derive(Debug)] #[derive(Debug)]
pub struct ExecEnd(pub Option<i32>); pub struct ExecEnd(pub Option<i32>);
/// Whether to call `ptrace::cont()` immediately. Used exclusively by `wait_for_signal`.
enum InitialCont {
Yes,
No,
}
/// This is the main loop of the supervisor process. It runs in a separate /// This is the main loop of the supervisor process. It runs in a separate
/// process from the rest of Miri (but because we fork, addresses for anything /// process from the rest of Miri (but because we fork, addresses for anything
/// created before the fork - like statics - are the same). /// created before the fork - like statics - are the same).
@ -239,12 +230,12 @@ pub fn sv_loop(
let mut curr_pid = init_pid; let mut curr_pid = init_pid;
// There's an initial sigstop we need to deal with. // There's an initial sigstop we need to deal with.
wait_for_signal(Some(curr_pid), signal::SIGSTOP, false)?; wait_for_signal(Some(curr_pid), signal::SIGSTOP, InitialCont::No)?;
ptrace::cont(curr_pid, None).unwrap(); ptrace::cont(curr_pid, None).unwrap();
for evt in listener { for evt in listener {
match evt { match evt {
// start_ffi was called by the child, so prep memory. // Child started ffi, so prep memory.
ExecEvent::Start(ch_info) => { ExecEvent::Start(ch_info) => {
// All the pages that the child process is "allowed to" access. // All the pages that the child process is "allowed to" access.
ch_pages = ch_info.page_ptrs; ch_pages = ch_info.page_ptrs;
@ -252,17 +243,17 @@ pub fn sv_loop(
ch_stack = Some(ch_info.stack_ptr); ch_stack = Some(ch_info.stack_ptr);
// We received the signal and are no longer in the main listener loop, // We received the signal and are no longer in the main listener loop,
// so we can let the child move on to the end of start_ffi where it will // so we can let the child move on to the end of the ffi prep where it will
// raise a SIGSTOP. We need it to be signal-stopped *and waited for* in // raise a SIGSTOP. We need it to be signal-stopped *and waited for* in
// order to do most ptrace operations! // order to do most ptrace operations!
confirm_tx.send(Confirmation).unwrap(); confirm_tx.send(Confirmation).unwrap();
// We can't trust simply calling `Pid::this()` in the child process to give the right // We can't trust simply calling `Pid::this()` in the child process to give the right
// PID for us, so we get it this way. // PID for us, so we get it this way.
curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap(); curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No).unwrap();
ptrace::syscall(curr_pid, None).unwrap(); ptrace::syscall(curr_pid, None).unwrap();
} }
// end_ffi was called by the child. // Child wants to end tracing.
ExecEvent::End => { ExecEvent::End => {
// Hand over the access info we traced. // Hand over the access info we traced.
event_tx.send(MemEvents { acc_events }).unwrap(); event_tx.send(MemEvents { acc_events }).unwrap();
@ -322,10 +313,6 @@ fn get_disasm() -> capstone::Capstone {
{cs_pre.x86().mode(arch::x86::ArchMode::Mode64)} {cs_pre.x86().mode(arch::x86::ArchMode::Mode64)}
#[cfg(target_arch = "x86")] #[cfg(target_arch = "x86")]
{cs_pre.x86().mode(arch::x86::ArchMode::Mode32)} {cs_pre.x86().mode(arch::x86::ArchMode::Mode32)}
#[cfg(target_arch = "aarch64")]
{cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)}
#[cfg(target_arch = "arm")]
{cs_pre.arm().mode(arch::arm::ArchMode::Arm)}
} }
.detail(true) .detail(true)
.build() .build()
@ -339,9 +326,9 @@ fn get_disasm() -> capstone::Capstone {
fn wait_for_signal( fn wait_for_signal(
pid: Option<unistd::Pid>, pid: Option<unistd::Pid>,
wait_signal: signal::Signal, wait_signal: signal::Signal,
init_cont: bool, init_cont: InitialCont,
) -> Result<unistd::Pid, ExecEnd> { ) -> Result<unistd::Pid, ExecEnd> {
if init_cont { if matches!(init_cont, InitialCont::Yes) {
ptrace::cont(pid.unwrap(), None).unwrap(); ptrace::cont(pid.unwrap(), None).unwrap();
} }
// Repeatedly call `waitid` until we get the signal we want, or the process dies. // Repeatedly call `waitid` until we get the signal we want, or the process dies.
@ -374,6 +361,84 @@ fn wait_for_signal(
} }
} }
/// Add the memory events from `op` being executed while there is a memory access at `addr` to
/// `acc_events`. Return whether this was a memory operand.
fn capstone_find_events(
addr: usize,
op: &capstone::arch::ArchOperand,
acc_events: &mut Vec<AccessEvent>,
) -> bool {
use capstone::prelude::*;
match op {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
arch::ArchOperand::X86Operand(x86_operand) => {
match x86_operand.op_type {
// We only care about memory accesses
arch::x86::X86OperandType::Mem(_) => {
let push = AccessRange { addr, size: x86_operand.size.into() };
// It's called a "RegAccessType" but it also applies to memory
let acc_ty = x86_operand.access.unwrap();
// The same instruction might do both reads and writes, so potentially add both.
// We do not know the order in which they happened, but writing and then reading
// makes little sense so we put the read first. That is also the more
// conservative choice.
if acc_ty.is_readable() {
acc_events.push(AccessEvent::Read(push.clone()));
}
if acc_ty.is_writable() {
// FIXME: This could be made certain; either determine all cases where
// only reads happen, or have an intermediate mempr_* function to first
// map the page(s) as readonly and check if a segfault occurred.
// Per https://docs.rs/iced-x86/latest/iced_x86/enum.OpAccess.html,
// we know that the possible access types are Read, CondRead, Write,
// CondWrite, ReadWrite, and ReadCondWrite. Since we got a segfault
// we know some kind of access happened so Cond{Read, Write}s are
// certain reads and writes; the only uncertainty is with an RW op
// as it might be a ReadCondWrite with the write condition unmet.
acc_events.push(AccessEvent::Write(push, !acc_ty.is_readable()));
}
return true;
}
_ => (),
}
}
// FIXME: arm64
_ => unimplemented!(),
}
false
}
/// Extract the events from the given instruction.
fn capstone_disassemble(
instr: &[u8],
addr: usize,
cs: &capstone::Capstone,
acc_events: &mut Vec<AccessEvent>,
) -> capstone::CsResult<()> {
// The arch_detail is what we care about, but it relies on these temporaries
// that we can't drop. 0x1000 is the default base address for Captsone, and
// we're expecting 1 instruction.
let insns = cs.disasm_count(instr, 0x1000, 1)?;
let ins_detail = cs.insn_detail(&insns[0])?;
let arch_detail = ins_detail.arch_detail();
let mut found_mem_op = false;
for op in arch_detail.operands() {
if capstone_find_events(addr, &op, acc_events) {
if found_mem_op {
panic!("more than one memory operand found; we don't know which one accessed what");
}
found_mem_op = true;
}
}
Ok(())
}
/// Grabs the access that caused a segfault and logs it down if it's to our memory, /// Grabs the access that caused a segfault and logs it down if it's to our memory,
/// or kills the child and returns the appropriate error otherwise. /// or kills the child and returns the appropriate error otherwise.
fn handle_segfault( fn handle_segfault(
@ -384,116 +449,10 @@ fn handle_segfault(
cs: &capstone::Capstone, cs: &capstone::Capstone,
acc_events: &mut Vec<AccessEvent>, acc_events: &mut Vec<AccessEvent>,
) -> Result<(), ExecEnd> { ) -> Result<(), ExecEnd> {
/// This is just here to not pollute the main namespace with `capstone::prelude::*`.
#[inline]
fn capstone_disassemble(
instr: &[u8],
addr: usize,
cs: &capstone::Capstone,
acc_events: &mut Vec<AccessEvent>,
) -> capstone::CsResult<()> {
use capstone::prelude::*;
// The arch_detail is what we care about, but it relies on these temporaries
// that we can't drop. 0x1000 is the default base address for Captsone, and
// we're expecting 1 instruction.
let insns = cs.disasm_count(instr, 0x1000, 1)?;
let ins_detail = cs.insn_detail(&insns[0])?;
let arch_detail = ins_detail.arch_detail();
for op in arch_detail.operands() {
match op {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
arch::ArchOperand::X86Operand(x86_operand) => {
match x86_operand.op_type {
// We only care about memory accesses
arch::x86::X86OperandType::Mem(_) => {
let push = addr..addr.strict_add(usize::from(x86_operand.size));
// It's called a "RegAccessType" but it also applies to memory
let acc_ty = x86_operand.access.unwrap();
if acc_ty.is_readable() {
acc_events.push(AccessEvent::Read(push.clone()));
}
if acc_ty.is_writable() {
acc_events.push(AccessEvent::Write(push));
}
}
_ => (),
}
}
#[cfg(target_arch = "aarch64")]
arch::ArchOperand::Arm64Operand(arm64_operand) => {
// Annoyingly, we don't always get the size here, so just be pessimistic for now.
match arm64_operand.op_type {
arch::arm64::Arm64OperandType::Mem(_) => {
// B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes.
let size = match arm64_operand.vas {
// Not an fp/simd instruction.
arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE,
// 1 byte.
arch::arm64::Arm64Vas::ARM64_VAS_1B => 1,
// 2 bytes.
arch::arm64::Arm64Vas::ARM64_VAS_1H => 2,
// 4 bytes.
arch::arm64::Arm64Vas::ARM64_VAS_4B
| arch::arm64::Arm64Vas::ARM64_VAS_2H
| arch::arm64::Arm64Vas::ARM64_VAS_1S => 4,
// 8 bytes.
arch::arm64::Arm64Vas::ARM64_VAS_8B
| arch::arm64::Arm64Vas::ARM64_VAS_4H
| arch::arm64::Arm64Vas::ARM64_VAS_2S
| arch::arm64::Arm64Vas::ARM64_VAS_1D => 8,
// 16 bytes.
arch::arm64::Arm64Vas::ARM64_VAS_16B
| arch::arm64::Arm64Vas::ARM64_VAS_8H
| arch::arm64::Arm64Vas::ARM64_VAS_4S
| arch::arm64::Arm64Vas::ARM64_VAS_2D
| arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16,
};
let push = addr..addr.strict_add(size);
// FIXME: This now has access type info in the latest
// git version of capstone because this pissed me off
// and I added it. Change this when it updates.
acc_events.push(AccessEvent::Read(push.clone()));
acc_events.push(AccessEvent::Write(push));
}
_ => (),
}
}
#[cfg(target_arch = "arm")]
arch::ArchOperand::ArmOperand(arm_operand) =>
match arm_operand.op_type {
arch::arm::ArmOperandType::Mem(_) => {
// We don't get info on the size of the access, but
// we're at least told if it's a vector instruction.
let size = if arm_operand.vector_index.is_some() {
ARCH_MAX_ACCESS_SIZE
} else {
ARCH_WORD_SIZE
};
let push = addr..addr.strict_add(size);
let acc_ty = arm_operand.access.unwrap();
if acc_ty.is_readable() {
acc_events.push(AccessEvent::Read(push.clone()));
}
if acc_ty.is_writable() {
acc_events.push(AccessEvent::Write(push));
}
}
_ => (),
},
_ => unimplemented!(),
}
}
Ok(())
}
// Get information on what caused the segfault. This contains the address // Get information on what caused the segfault. This contains the address
// that triggered it. // that triggered it.
let siginfo = ptrace::getsiginfo(pid).unwrap(); let siginfo = ptrace::getsiginfo(pid).unwrap();
// All x86, ARM, etc. instructions only have at most one memory operand // All x86 instructions only have at most one memory operand (thankfully!)
// (thankfully!)
// SAFETY: si_addr is safe to call. // SAFETY: si_addr is safe to call.
let addr = unsafe { siginfo.si_addr().addr() }; let addr = unsafe { siginfo.si_addr().addr() };
let page_addr = addr.strict_sub(addr.strict_rem(page_size)); let page_addr = addr.strict_sub(addr.strict_rem(page_size));
@ -515,7 +474,7 @@ fn handle_segfault(
// global atomic variables. This is what we use the temporary callback stack for. // global atomic variables. This is what we use the temporary callback stack for.
// - Step 1 instruction // - Step 1 instruction
// - Parse executed code to estimate size & type of access // - Parse executed code to estimate size & type of access
// - Reprotect the memory by executing `mempr_on` in the child. // - Reprotect the memory by executing `mempr_on` in the child, using the callback stack again.
// - Continue // - Continue
// Ensure the stack is properly zeroed out! // Ensure the stack is properly zeroed out!
@ -540,7 +499,7 @@ fn handle_segfault(
ptrace::write( ptrace::write(
pid, pid,
(&raw const PAGE_ADDR).cast_mut().cast(), (&raw const PAGE_ADDR).cast_mut().cast(),
libc::c_long::try_from(page_addr).unwrap(), libc::c_long::try_from(page_addr.cast_signed()).unwrap(),
) )
.unwrap(); .unwrap();
@ -552,7 +511,7 @@ fn handle_segfault(
ptrace::setregs(pid, new_regs).unwrap(); ptrace::setregs(pid, new_regs).unwrap();
// Our mempr_* functions end with a raise(SIGSTOP). // Our mempr_* functions end with a raise(SIGSTOP).
wait_for_signal(Some(pid), signal::SIGSTOP, true)?; wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?;
// Step 1 instruction. // Step 1 instruction.
ptrace::setregs(pid, regs_bak).unwrap(); ptrace::setregs(pid, regs_bak).unwrap();
@ -573,6 +532,12 @@ fn handle_segfault(
let regs_bak = ptrace::getregs(pid).unwrap(); let regs_bak = ptrace::getregs(pid).unwrap();
new_regs = regs_bak; new_regs = regs_bak;
let ip_poststep = regs_bak.ip(); let ip_poststep = regs_bak.ip();
// Ensure that we've actually gone forwards.
assert!(ip_poststep > ip_prestep);
// But not by too much. 64 bytes should be "big enough" on ~any architecture.
assert!(ip_prestep.strict_add(64) > ip_poststep);
// We need to do reads/writes in word-sized chunks. // We need to do reads/writes in word-sized chunks.
let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE); let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE);
let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| { let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| {
@ -587,20 +552,14 @@ fn handle_segfault(
}); });
// Now figure out the size + type of access and log it down. // Now figure out the size + type of access and log it down.
// This will mark down e.g. the same area being read multiple times, capstone_disassemble(&instr, addr, cs, acc_events).expect("Failed to disassemble instruction");
// since it's more efficient to compress the accesses at the end.
if capstone_disassemble(&instr, addr, cs, acc_events).is_err() {
// Read goes first because we need to be pessimistic.
acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
}
// Reprotect everything and continue. // Reprotect everything and continue.
#[expect(clippy::as_conversions)] #[expect(clippy::as_conversions)]
new_regs.set_ip(mempr_on as usize); new_regs.set_ip(mempr_on as usize);
new_regs.set_sp(stack_ptr); new_regs.set_sp(stack_ptr);
ptrace::setregs(pid, new_regs).unwrap(); ptrace::setregs(pid, new_regs).unwrap();
wait_for_signal(Some(pid), signal::SIGSTOP, true)?; wait_for_signal(Some(pid), signal::SIGSTOP, InitialCont::Yes)?;
ptrace::setregs(pid, regs_bak).unwrap(); ptrace::setregs(pid, regs_bak).unwrap();
ptrace::syscall(pid, None).unwrap(); ptrace::syscall(pid, None).unwrap();

View file

@ -0,0 +1,34 @@
use rustc_const_eval::interpret::InterpResult;
static SUPERVISOR: std::sync::Mutex<()> = std::sync::Mutex::new(());
pub struct Supervisor;
#[derive(Debug)]
pub struct SvInitError;
impl Supervisor {
#[inline(always)]
pub fn is_enabled() -> bool {
false
}
pub fn do_ffi<'tcx, T>(
_: T,
f: impl FnOnce() -> InterpResult<'tcx, crate::ImmTy<'tcx>>,
) -> InterpResult<'tcx, (crate::ImmTy<'tcx>, Option<super::MemEvents>)> {
// We acquire the lock to ensure that no two FFI calls run concurrently.
let _g = SUPERVISOR.lock().unwrap();
f().map(|v| (v, None))
}
}
#[inline(always)]
#[allow(dead_code, clippy::missing_safety_doc)]
pub unsafe fn init_sv() -> Result<!, SvInitError> {
Err(SvInitError)
}
#[inline(always)]
#[allow(dead_code)]
pub fn register_retcode_sv<T>(_: T) {}

View file

@ -1,162 +1,12 @@
//! Panic runtime for Miri. //! Helper functions for causing panics.
//!
//! The core pieces of the runtime are:
//! - An implementation of `__rust_maybe_catch_panic` that pushes the invoked stack frame with
//! some extra metadata derived from the panic-catching arguments of `__rust_maybe_catch_panic`.
//! - A hack in `libpanic_unwind` that calls the `miri_start_unwind` intrinsic instead of the
//! target-native panic runtime. (This lives in the rustc repo.)
//! - An implementation of `miri_start_unwind` that stores its argument (the panic payload), and then
//! immediately returns, but on the *unwind* edge (not the normal return edge), thus initiating unwinding.
//! - A hook executed each time a frame is popped, such that if the frame pushed by `__rust_maybe_catch_panic`
//! gets popped *during unwinding*, we take the panic payload and store it according to the extra
//! metadata we remembered when pushing said frame.
use rustc_abi::ExternAbi; use rustc_abi::ExternAbi;
use rustc_middle::{mir, ty}; use rustc_middle::{mir, ty};
use rustc_target::spec::PanicStrategy;
use self::helpers::check_intrinsic_arg_count;
use crate::*; use crate::*;
/// Holds all of the relevant data for when unwinding hits a `try` frame.
#[derive(Debug)]
pub struct CatchUnwindData<'tcx> {
/// The `catch_fn` callback to call in case of a panic.
catch_fn: Pointer,
/// The `data` argument for that callback.
data: ImmTy<'tcx>,
/// The return place from the original call to `try`.
dest: MPlaceTy<'tcx>,
/// The return block from the original call to `try`.
ret: Option<mir::BasicBlock>,
}
impl VisitProvenance for CatchUnwindData<'_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let CatchUnwindData { catch_fn, data, dest, ret: _ } = self;
catch_fn.visit_provenance(visit);
data.visit_provenance(visit);
dest.visit_provenance(visit);
}
}
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Handles the special `miri_start_unwind` intrinsic, which is called
/// by libpanic_unwind to delegate the actual unwinding process to Miri.
fn handle_miri_start_unwind(&mut self, payload: &OpTy<'tcx>) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
trace!("miri_start_unwind: {:?}", this.frame().instance());
let payload = this.read_immediate(payload)?;
let thread = this.active_thread_mut();
thread.panic_payloads.push(payload);
interp_ok(())
}
/// Handles the `catch_unwind` intrinsic.
fn handle_catch_unwind(
&mut self,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
ret: Option<mir::BasicBlock>,
) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
// Signature:
// fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32
// Calls `try_fn` with `data` as argument. If that executes normally, returns 0.
// If that unwinds, calls `catch_fn` with the first argument being `data` and
// then second argument being a target-dependent `payload` (i.e. it is up to us to define
// what that is), and returns 1.
// The `payload` is passed (by libstd) to `__rust_panic_cleanup`, which is then expected to
// return a `Box<dyn Any + Send + 'static>`.
// In Miri, `miri_start_unwind` is passed exactly that type, so we make the `payload` simply
// a pointer to `Box<dyn Any + Send + 'static>`.
// Get all the arguments.
let [try_fn, data, catch_fn] = check_intrinsic_arg_count(args)?;
let try_fn = this.read_pointer(try_fn)?;
let data = this.read_immediate(data)?;
let catch_fn = this.read_pointer(catch_fn)?;
// Now we make a function call, and pass `data` as first and only argument.
let f_instance = this.get_ptr_fn(try_fn)?.as_instance()?;
trace!("try_fn: {:?}", f_instance);
#[allow(clippy::cloned_ref_to_slice_refs)] // the code is clearer as-is
this.call_function(
f_instance,
ExternAbi::Rust,
&[data.clone()],
None,
// Directly return to caller.
ReturnContinuation::Goto { ret, unwind: mir::UnwindAction::Continue },
)?;
// We ourselves will return `0`, eventually (will be overwritten if we catch a panic).
this.write_null(dest)?;
// In unwind mode, we tag this frame with the extra data needed to catch unwinding.
// This lets `handle_stack_pop` (below) know that we should stop unwinding
// when we pop this frame.
if this.tcx.sess.panic_strategy() == PanicStrategy::Unwind {
this.frame_mut().extra.catch_unwind =
Some(CatchUnwindData { catch_fn, data, dest: dest.clone(), ret });
}
interp_ok(())
}
fn handle_stack_pop_unwind(
&mut self,
mut extra: FrameExtra<'tcx>,
unwinding: bool,
) -> InterpResult<'tcx, ReturnAction> {
let this = self.eval_context_mut();
trace!("handle_stack_pop_unwind(extra = {:?}, unwinding = {})", extra, unwinding);
// We only care about `catch_panic` if we're unwinding - if we're doing a normal
// return, then we don't need to do anything special.
if let (true, Some(catch_unwind)) = (unwinding, extra.catch_unwind.take()) {
// We've just popped a frame that was pushed by `catch_unwind`,
// and we are unwinding, so we should catch that.
trace!(
"unwinding: found catch_panic frame during unwinding: {:?}",
this.frame().instance()
);
// We set the return value of `catch_unwind` to 1, since there was a panic.
this.write_scalar(Scalar::from_i32(1), &catch_unwind.dest)?;
// The Thread's `panic_payload` holds what was passed to `miri_start_unwind`.
// This is exactly the second argument we need to pass to `catch_fn`.
let payload = this.active_thread_mut().panic_payloads.pop().unwrap();
// Push the `catch_fn` stackframe.
let f_instance = this.get_ptr_fn(catch_unwind.catch_fn)?.as_instance()?;
trace!("catch_fn: {:?}", f_instance);
this.call_function(
f_instance,
ExternAbi::Rust,
&[catch_unwind.data, payload],
None,
// Directly return to caller of `catch_unwind`.
ReturnContinuation::Goto {
ret: catch_unwind.ret,
// `catch_fn` must not unwind.
unwind: mir::UnwindAction::Unreachable,
},
)?;
// We pushed a new stack frame, the engine should not do any jumping now!
interp_ok(ReturnAction::NoJump)
} else {
interp_ok(ReturnAction::Normal)
}
}
/// Start a panic in the interpreter with the given message as payload. /// Start a panic in the interpreter with the given message as payload.
fn start_panic(&mut self, msg: &str, unwind: mir::UnwindAction) -> InterpResult<'tcx> { fn start_panic(&mut self, msg: &str, unwind: mir::UnwindAction) -> InterpResult<'tcx> {
let this = self.eval_context_mut(); let this = self.eval_context_mut();

View file

@ -302,7 +302,7 @@ trait EvalContextPrivExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// Windows has a special magic linker section that is run on certain events. // Windows has a special magic linker section that is run on certain events.
// We don't support most of that, but just enough to make thread-local dtors in `std` work. // We don't support most of that, but just enough to make thread-local dtors in `std` work.
interp_ok(this.lookup_link_section(".CRT$XLB")?) interp_ok(this.lookup_link_section(|section| section == ".CRT$XLB")?)
} }
fn schedule_windows_tls_dtor(&mut self, dtor: ImmTy<'tcx>) -> InterpResult<'tcx> { fn schedule_windows_tls_dtor(&mut self, dtor: ImmTy<'tcx>) -> InterpResult<'tcx> {

View file

@ -132,12 +132,12 @@ trait EvalContextExtPrivate<'tcx>: crate::MiriInterpCxExt<'tcx> {
let buf = this.deref_pointer_as(buf_op, this.libc_ty_layout("stat"))?; let buf = this.deref_pointer_as(buf_op, this.libc_ty_layout("stat"))?;
this.write_int_fields_named( this.write_int_fields_named(
&[ &[
("st_dev", 0), ("st_dev", metadata.dev.into()),
("st_mode", mode.try_into().unwrap()), ("st_mode", mode.try_into().unwrap()),
("st_nlink", 0), ("st_nlink", 0),
("st_ino", 0), ("st_ino", 0),
("st_uid", 0), ("st_uid", metadata.uid.into()),
("st_gid", 0), ("st_gid", metadata.gid.into()),
("st_rdev", 0), ("st_rdev", 0),
("st_atime", access_sec.into()), ("st_atime", access_sec.into()),
("st_mtime", modified_sec.into()), ("st_mtime", modified_sec.into()),
@ -1544,6 +1544,9 @@ struct FileMetadata {
created: Option<(u64, u32)>, created: Option<(u64, u32)>,
accessed: Option<(u64, u32)>, accessed: Option<(u64, u32)>,
modified: Option<(u64, u32)>, modified: Option<(u64, u32)>,
dev: u64,
uid: u32,
gid: u32,
} }
impl FileMetadata { impl FileMetadata {
@ -1601,6 +1604,21 @@ impl FileMetadata {
let modified = extract_sec_and_nsec(metadata.modified())?; let modified = extract_sec_and_nsec(metadata.modified())?;
// FIXME: Provide more fields using platform specific methods. // FIXME: Provide more fields using platform specific methods.
interp_ok(Ok(FileMetadata { mode, size, created, accessed, modified }))
cfg_select! {
unix => {
use std::os::unix::fs::MetadataExt;
let dev = metadata.dev();
let uid = metadata.uid();
let gid = metadata.gid();
}
_ => {
let dev = 0;
let uid = 0;
let gid = 0;
}
}
interp_ok(Ok(FileMetadata { mode, size, created, accessed, modified, dev, uid, gid }))
} }
} }

View file

@ -0,0 +1,160 @@
//! Unwinding runtime for Miri.
//!
//! The core pieces of the runtime are:
//! - An implementation of `catch_unwind` that pushes the invoked stack frame with
//! some extra metadata derived from the panic-catching arguments of `catch_unwind`.
//! - A hack in `libpanic_unwind` that calls the `miri_start_unwind` intrinsic instead of the
//! target-native panic runtime. (This lives in the rustc repo.)
//! - An implementation of `miri_start_unwind` that stores its argument (the panic payload), and
//! then immediately returns, but on the *unwind* edge (not the normal return edge), thus
//! initiating unwinding.
//! - A hook executed each time a frame is popped, such that if the frame pushed by `catch_unwind`
//! gets popped *during unwinding*, we take the panic payload and store it according to the extra
//! metadata we remembered when pushing said frame.
use rustc_abi::ExternAbi;
use rustc_middle::mir;
use rustc_target::spec::PanicStrategy;
use self::helpers::check_intrinsic_arg_count;
use crate::*;
/// Holds all of the relevant data for when unwinding hits a `try` frame.
#[derive(Debug)]
pub struct CatchUnwindData<'tcx> {
/// The `catch_fn` callback to call in case of a panic.
catch_fn: Pointer,
/// The `data` argument for that callback.
data: ImmTy<'tcx>,
/// The return place from the original call to `try`.
dest: MPlaceTy<'tcx>,
/// The return block from the original call to `try`.
ret: Option<mir::BasicBlock>,
}
impl VisitProvenance for CatchUnwindData<'_> {
fn visit_provenance(&self, visit: &mut VisitWith<'_>) {
let CatchUnwindData { catch_fn, data, dest, ret: _ } = self;
catch_fn.visit_provenance(visit);
data.visit_provenance(visit);
dest.visit_provenance(visit);
}
}
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Handles the special `miri_start_unwind` intrinsic, which is called
/// by libpanic_unwind to delegate the actual unwinding process to Miri.
fn handle_miri_start_unwind(&mut self, payload: &OpTy<'tcx>) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
trace!("miri_start_unwind: {:?}", this.frame().instance());
let payload = this.read_immediate(payload)?;
let thread = this.active_thread_mut();
thread.unwind_payloads.push(payload);
interp_ok(())
}
/// Handles the `catch_unwind` intrinsic.
fn handle_catch_unwind(
&mut self,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
ret: Option<mir::BasicBlock>,
) -> InterpResult<'tcx> {
let this = self.eval_context_mut();
// Signature:
// fn catch_unwind(try_fn: fn(*mut u8), data: *mut u8, catch_fn: fn(*mut u8, *mut u8)) -> i32
// Calls `try_fn` with `data` as argument. If that executes normally, returns 0.
// If that unwinds, calls `catch_fn` with the first argument being `data` and
// then second argument being a target-dependent `payload` (i.e. it is up to us to define
// what that is), and returns 1.
// The `payload` is passed (by libstd) to `__rust_panic_cleanup`, which is then expected to
// return a `Box<dyn Any + Send + 'static>`.
// In Miri, `miri_start_unwind` is passed exactly that type, so we make the `payload` simply
// a pointer to `Box<dyn Any + Send + 'static>`.
// Get all the arguments.
let [try_fn, data, catch_fn] = check_intrinsic_arg_count(args)?;
let try_fn = this.read_pointer(try_fn)?;
let data = this.read_immediate(data)?;
let catch_fn = this.read_pointer(catch_fn)?;
// Now we make a function call, and pass `data` as first and only argument.
let f_instance = this.get_ptr_fn(try_fn)?.as_instance()?;
trace!("try_fn: {:?}", f_instance);
#[allow(clippy::cloned_ref_to_slice_refs)] // the code is clearer as-is
this.call_function(
f_instance,
ExternAbi::Rust,
&[data.clone()],
None,
// Directly return to caller.
ReturnContinuation::Goto { ret, unwind: mir::UnwindAction::Continue },
)?;
// We ourselves will return `0`, eventually (will be overwritten if we catch a panic).
this.write_null(dest)?;
// In unwind mode, we tag this frame with the extra data needed to catch unwinding.
// This lets `handle_stack_pop` (below) know that we should stop unwinding
// when we pop this frame.
if this.tcx.sess.panic_strategy() == PanicStrategy::Unwind {
this.frame_mut().extra.catch_unwind =
Some(CatchUnwindData { catch_fn, data, dest: dest.clone(), ret });
}
interp_ok(())
}
fn handle_stack_pop_unwind(
&mut self,
mut extra: FrameExtra<'tcx>,
unwinding: bool,
) -> InterpResult<'tcx, ReturnAction> {
let this = self.eval_context_mut();
trace!("handle_stack_pop_unwind(extra = {:?}, unwinding = {})", extra, unwinding);
// We only care about `catch_panic` if we're unwinding - if we're doing a normal
// return, then we don't need to do anything special.
if let (true, Some(catch_unwind)) = (unwinding, extra.catch_unwind.take()) {
// We've just popped a frame that was pushed by `catch_unwind`,
// and we are unwinding, so we should catch that.
trace!(
"unwinding: found catch_panic frame during unwinding: {:?}",
this.frame().instance()
);
// We set the return value of `catch_unwind` to 1, since there was a panic.
this.write_scalar(Scalar::from_i32(1), &catch_unwind.dest)?;
// The Thread's `panic_payload` holds what was passed to `miri_start_unwind`.
// This is exactly the second argument we need to pass to `catch_fn`.
let payload = this.active_thread_mut().unwind_payloads.pop().unwrap();
// Push the `catch_fn` stackframe.
let f_instance = this.get_ptr_fn(catch_unwind.catch_fn)?.as_instance()?;
trace!("catch_fn: {:?}", f_instance);
this.call_function(
f_instance,
ExternAbi::Rust,
&[catch_unwind.data, payload],
None,
// Directly return to caller of `catch_unwind`.
ReturnContinuation::Goto {
ret: catch_unwind.ret,
// `catch_fn` must not unwind.
unwind: mir::UnwindAction::Unreachable,
},
)?;
// We pushed a new stack frame, the engine should not do any jumping now!
interp_ok(ReturnAction::NoJump)
} else {
interp_ok(ReturnAction::Normal)
}
}
}

View file

@ -0,0 +1,25 @@
//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
//@compile-flags: -Zmiri-native-lib-enable-tracing
extern "C" {
fn init_n(n: i32, ptr: *mut u8);
}
fn main() {
partial_init();
}
// Initialise the first 2 elements of the slice from native code, and check
// that the 3rd is correctly deemed uninit.
fn partial_init() {
let mut slice = std::mem::MaybeUninit::<[u8; 3]>::uninit();
let slice_ptr = slice.as_mut_ptr().cast::<u8>();
unsafe {
// Initialize the first two elements.
init_n(2, slice_ptr);
assert!(*slice_ptr == 0);
assert!(*slice_ptr.offset(1) == 0);
// Reading the third is UB!
let _val = *slice_ptr.offset(2); //~ ERROR: Undefined Behavior: using uninitialized data
}
}

View file

@ -0,0 +1,39 @@
warning: sharing memory with a native function called via FFI
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
|
LL | init_n(2, slice_ptr);
| ^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
|
= help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
= help: in particular, Miri assumes that the native call initializes all memory it has written to
= help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
= help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
= help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
= note: BACKTRACE:
= note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC
note: inside `main`
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
|
LL | partial_init();
| ^^^^^^^^^^^^^^
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
|
LL | let _val = *slice_ptr.offset(2);
| ^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC
note: inside `main`
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
|
LL | partial_init();
| ^^^^^^^^^^^^^^
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error; 1 warning emitted

View file

@ -0,0 +1,29 @@
//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
//@compile-flags: -Zmiri-permissive-provenance -Zmiri-native-lib-enable-tracing
extern "C" {
fn do_one_deref(ptr: *const *const *const i32) -> usize;
}
fn main() {
unexposed_reachable_alloc();
}
// Expose 2 pointers by virtue of doing a native read and assert that the 3rd in
// the chain remains properly unexposed.
fn unexposed_reachable_alloc() {
let inner = 42;
let intermediate_a = &raw const inner;
let intermediate_b = &raw const intermediate_a;
let exposed = &raw const intermediate_b;
// Discard the return value; it's just there so the access in C doesn't get optimised away.
unsafe { do_one_deref(exposed) };
// Native read should have exposed the address of intermediate_b...
let valid: *const i32 = std::ptr::with_exposed_provenance(intermediate_b.addr());
// but not of intermediate_a.
let invalid: *const i32 = std::ptr::with_exposed_provenance(intermediate_a.addr());
unsafe {
let _ok = *valid;
let _not_ok = *invalid; //~ ERROR: Undefined Behavior: memory access failed: attempting to access
}
}

View file

@ -0,0 +1,39 @@
warning: sharing memory with a native function called via FFI
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
|
LL | unsafe { do_one_deref(exposed) };
| ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
|
= help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
= help: in particular, Miri assumes that the native call initializes all memory it has written to
= help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
= help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
= help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
= note: BACKTRACE:
= note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
note: inside `main`
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
|
LL | unexposed_reachable_alloc();
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
error: Undefined Behavior: memory access failed: attempting to access 4 bytes, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
|
LL | let _not_ok = *invalid;
| ^^^^^^^^ Undefined Behavior occurred here
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
note: inside `main`
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
|
LL | unexposed_reachable_alloc();
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error; 1 warning emitted

View file

@ -1,3 +1,7 @@
//@revisions: trace notrace
//@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing
fn main() { fn main() {
test_access_pointer(); test_access_pointer();
test_access_simple(); test_access_simple();

View file

@ -0,0 +1,19 @@
warning: sharing memory with a native function called via FFI
--> tests/native-lib/pass/ptr_read_access.rs:LL:CC
|
LL | unsafe { print_pointer(&x) };
| ^^^^^^^^^^^^^^^^^ sharing memory with a native function
|
= help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
= help: in particular, Miri assumes that the native call initializes all memory it has written to
= help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
= help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
= help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
= note: BACKTRACE:
= note: inside `test_access_pointer` at tests/native-lib/pass/ptr_read_access.rs:LL:CC
note: inside `main`
--> tests/native-lib/pass/ptr_read_access.rs:LL:CC
|
LL | test_access_pointer();
| ^^^^^^^^^^^^^^^^^^^^^

View file

@ -0,0 +1 @@
printing pointer dereference from C: 42

View file

@ -1,3 +1,6 @@
//@revisions: trace notrace
//@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing
//@compile-flags: -Zmiri-permissive-provenance //@compile-flags: -Zmiri-permissive-provenance
#![feature(box_as_ptr)] #![feature(box_as_ptr)]

View file

@ -0,0 +1,19 @@
warning: sharing memory with a native function called via FFI
--> tests/native-lib/pass/ptr_write_access.rs:LL:CC
|
LL | unsafe { increment_int(&mut x) };
| ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
|
= help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
= help: in particular, Miri assumes that the native call initializes all memory it has written to
= help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
= help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
= help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
= note: BACKTRACE:
= note: inside `test_increment_int` at tests/native-lib/pass/ptr_write_access.rs:LL:CC
note: inside `main`
--> tests/native-lib/pass/ptr_write_access.rs:LL:CC
|
LL | test_increment_int();
| ^^^^^^^^^^^^^^^^^^^^

View file

@ -49,3 +49,9 @@ typedef struct Static {
EXPORT int32_t access_static(const Static *s_ptr) { EXPORT int32_t access_static(const Static *s_ptr) {
return s_ptr->recurse->recurse->value; return s_ptr->recurse->recurse->value;
} }
/* Test: unexposed_reachable_alloc */
EXPORT uintptr_t do_one_deref(const int32_t ***ptr) {
return (uintptr_t)*ptr;
}

View file

@ -107,3 +107,11 @@ EXPORT void set_shared_mem(int32_t** ptr) {
EXPORT void init_ptr_stored_in_shared_mem(int32_t val) { EXPORT void init_ptr_stored_in_shared_mem(int32_t val) {
**shared_place = val; **shared_place = val;
} }
/* Test: partial_init */
EXPORT void init_n(int32_t n, char* ptr) {
for (int i=0; i<n; i++) {
*(ptr+i) = 0;
}
}

View file

@ -41,7 +41,15 @@ fn static_atomic_bool(val: bool) -> &'static AtomicBool {
} }
/// Spins until it acquires a pre-determined value. /// Spins until it acquires a pre-determined value.
fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { fn spin_until_i32(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 {
while loc.load(ord) != val {
std::hint::spin_loop();
}
val
}
/// Spins until it acquires a pre-determined boolean.
fn spin_until_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool {
while loc.load(ord) != val { while loc.load(ord) != val {
std::hint::spin_loop(); std::hint::spin_loop();
} }
@ -65,7 +73,7 @@ fn test_corr() {
}); // | | }); // | |
#[rustfmt::skip] // |synchronizes-with |happens-before #[rustfmt::skip] // |synchronizes-with |happens-before
let j3 = spawn(move || { // | | let j3 = spawn(move || { // | |
loads_value(&y, Acquire, 1); // <------------+ | spin_until_i32(&y, Acquire, 1); // <---------+ |
x.load(Relaxed) // <----------------------------------------------+ x.load(Relaxed) // <----------------------------------------------+
// The two reads on x are ordered by hb, so they cannot observe values // The two reads on x are ordered by hb, so they cannot observe values
// differently from the modification order. If the first read observed // differently from the modification order. If the first read observed
@ -90,12 +98,12 @@ fn test_wrc() {
}); // | | }); // | |
#[rustfmt::skip] // |synchronizes-with | #[rustfmt::skip] // |synchronizes-with |
let j2 = spawn(move || { // | | let j2 = spawn(move || { // | |
loads_value(&x, Acquire, 1); // <------------+ | spin_until_i32(&x, Acquire, 1); // <---------+ |
y.store(1, Release); // ---------------------+ |happens-before y.store(1, Release); // ---------------------+ |happens-before
}); // | | }); // | |
#[rustfmt::skip] // |synchronizes-with | #[rustfmt::skip] // |synchronizes-with |
let j3 = spawn(move || { // | | let j3 = spawn(move || { // | |
loads_value(&y, Acquire, 1); // <------------+ | spin_until_i32(&y, Acquire, 1); // <---------+ |
x.load(Relaxed) // <-----------------------------------------------+ x.load(Relaxed) // <-----------------------------------------------+
}); });
@ -121,7 +129,7 @@ fn test_message_passing() {
#[rustfmt::skip] // |synchronizes-with | happens-before #[rustfmt::skip] // |synchronizes-with | happens-before
let j2 = spawn(move || { // | | let j2 = spawn(move || { // | |
let x = x; // avoid field capturing | | let x = x; // avoid field capturing | |
loads_value(&y, Acquire, 1); // <------------+ | spin_until_i32(&y, Acquire, 1); // <---------+ |
unsafe { *x.0 } // <---------------------------------------------+ unsafe { *x.0 } // <---------------------------------------------+
}); });
@ -216,12 +224,12 @@ fn test_sync_through_rmw_and_fences() {
let go = static_atomic_bool(false); let go = static_atomic_bool(false);
let t1 = spawn(move || { let t1 = spawn(move || {
while !go.load(Relaxed) {} spin_until_bool(go, Relaxed, true);
rdmw(y, x, z) rdmw(y, x, z)
}); });
let t2 = spawn(move || { let t2 = spawn(move || {
while !go.load(Relaxed) {} spin_until_bool(go, Relaxed, true);
rdmw(z, x, y) rdmw(z, x, y)
}); });

View file

@ -20,7 +20,15 @@ fn static_atomic_bool(val: bool) -> &'static AtomicBool {
} }
/// Spins until it acquires a pre-determined value. /// Spins until it acquires a pre-determined value.
fn loads_value(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 { fn spin_until_i32(loc: &AtomicI32, ord: Ordering, val: i32) -> i32 {
while loc.load(ord) != val {
std::hint::spin_loop();
}
val
}
/// Spins until it acquires a pre-determined boolean.
fn spin_until_bool(loc: &AtomicBool, ord: Ordering, val: bool) -> bool {
while loc.load(ord) != val { while loc.load(ord) != val {
std::hint::spin_loop(); std::hint::spin_loop();
} }
@ -60,11 +68,11 @@ fn test_iriw_sc_rlx() {
let a = spawn(move || x.store(true, Relaxed)); let a = spawn(move || x.store(true, Relaxed));
let b = spawn(move || y.store(true, Relaxed)); let b = spawn(move || y.store(true, Relaxed));
let c = spawn(move || { let c = spawn(move || {
while !x.load(SeqCst) {} spin_until_bool(x, SeqCst, true);
y.load(SeqCst) y.load(SeqCst)
}); });
let d = spawn(move || { let d = spawn(move || {
while !y.load(SeqCst) {} spin_until_bool(y, SeqCst, true);
x.load(SeqCst) x.load(SeqCst)
}); });
@ -136,7 +144,7 @@ fn test_cpp20_rwc_syncs() {
}); });
let j2 = spawn(move || { let j2 = spawn(move || {
loads_value(&x, Relaxed, 1); spin_until_i32(&x, Relaxed, 1);
fence(SeqCst); fence(SeqCst);
y.load(Relaxed) y.load(Relaxed)
}); });

View file

@ -1,7 +1,7 @@
#![no_std] #![no_std]
#![no_main] #![no_main]
//@compile-flags: -Zmiri-track-alloc-id=20 -Zmiri-track-alloc-accesses -Cpanic=abort //@compile-flags: -Zmiri-track-alloc-id=19 -Zmiri-track-alloc-accesses -Cpanic=abort
//@normalize-stderr-test: "id 20" -> "id $$ALLOC" //@normalize-stderr-test: "id 19" -> "id $$ALLOC"
//@only-target: linux # alloc IDs differ between OSes (due to extern static allocations) //@only-target: linux # alloc IDs differ between OSes (due to extern static allocations)
extern "Rust" { extern "Rust" {

View file

@ -0,0 +1,46 @@
use std::sync::atomic::{AtomicUsize, Ordering};
static COUNT: AtomicUsize = AtomicUsize::new(0);
unsafe extern "C" fn ctor() {
COUNT.fetch_add(1, Ordering::Relaxed);
}
#[rustfmt::skip]
macro_rules! ctor {
($ident:ident = $ctor:ident) => {
#[cfg_attr(
all(any(
target_os = "linux",
target_os = "android",
target_os = "dragonfly",
target_os = "freebsd",
target_os = "haiku",
target_os = "illumos",
target_os = "netbsd",
target_os = "openbsd",
target_os = "solaris",
target_os = "none",
target_family = "wasm",
)),
link_section = ".init_array"
)]
#[cfg_attr(windows, link_section = ".CRT$XCU")]
#[cfg_attr(
any(target_os = "macos", target_os = "ios"),
// We do not set the `mod_init_funcs` flag here since ctor/inventory also do not do
// that. See <https://github.com/rust-lang/miri/pull/4459#discussion_r2200115629>.
link_section = "__DATA,__mod_init_func"
)]
#[used]
static $ident: unsafe extern "C" fn() = $ctor;
};
}
ctor! { CTOR1 = ctor }
ctor! { CTOR2 = ctor }
ctor! { CTOR3 = ctor }
fn main() {
assert_eq!(COUNT.load(Ordering::Relaxed), 3, "ctors did not run");
}

View file

@ -24,7 +24,7 @@ fn static_atomic(val: usize) -> &'static AtomicUsize {
} }
// Spins until it reads the given value // Spins until it reads the given value
fn reads_value(loc: &AtomicUsize, val: usize) -> usize { fn spin_until(loc: &AtomicUsize, val: usize) -> usize {
while loc.load(Relaxed) != val { while loc.load(Relaxed) != val {
std::hint::spin_loop(); std::hint::spin_loop();
} }
@ -85,7 +85,7 @@ fn initialization_write(add_fence: bool) -> bool {
}); });
let j2 = spawn(move || { let j2 = spawn(move || {
reads_value(wait, 1); spin_until(wait, 1);
if add_fence { if add_fence {
fence(AcqRel); fence(AcqRel);
} }
@ -119,12 +119,12 @@ fn faa_replaced_by_load() -> bool {
let go = static_atomic(0); let go = static_atomic(0);
let t1 = spawn(move || { let t1 = spawn(move || {
while go.load(Relaxed) == 0 {} spin_until(go, 1);
rdmw(y, x, z) rdmw(y, x, z)
}); });
let t2 = spawn(move || { let t2 = spawn(move || {
while go.load(Relaxed) == 0 {} spin_until(go, 1);
rdmw(z, x, y) rdmw(z, x, y)
}); });