//! Main evaluator loop and setting up the initial stack frame. use std::ffi::{OsStr, OsString}; use std::iter; use std::panic::{self, AssertUnwindSafe}; use std::path::PathBuf; use std::thread; use log::info; use rustc_data_structures::fx::FxHashSet; use rustc_hir::def_id::DefId; use rustc_middle::ty::{ self, layout::{LayoutCx, LayoutOf}, TyCtxt, }; use rustc_target::spec::abi::Abi; use rustc_session::config::EntryFnType; use crate::*; #[derive(Copy, Clone, Debug, PartialEq)] pub enum AlignmentCheck { /// Do not check alignment. None, /// Check alignment "symbolically", i.e., using only the requested alignment for an allocation and not its real base address. Symbolic, /// Check alignment on the actual physical integer address. Int, } #[derive(Copy, Clone, Debug, PartialEq)] pub enum RejectOpWith { /// Isolated op is rejected with an abort of the machine. Abort, /// If not Abort, miri returns an error for an isolated op. /// Following options determine if user should be warned about such error. /// Do not print warning about rejected isolated op. NoWarning, /// Print a warning about rejected isolated op, with backtrace. Warning, /// Print a warning about rejected isolated op, without backtrace. WarningWithoutBacktrace, } #[derive(Copy, Clone, Debug, PartialEq)] pub enum IsolatedOp { /// Reject an op requiring communication with the host. By /// default, miri rejects the op with an abort. If not, it returns /// an error code, and prints a warning about it. Warning levels /// are controlled by `RejectOpWith` enum. Reject(RejectOpWith), /// Execute op requiring communication with the host, i.e. disable isolation. Allow, } #[derive(Copy, Clone, PartialEq, Eq)] pub enum BacktraceStyle { /// Prints a terser backtrace which ideally only contains relevant information. Short, /// Prints a backtrace with all possible information. Full, /// Prints only the frame that the error occurs in. Off, } /// Configuration needed to spawn a Miri instance. #[derive(Clone)] pub struct MiriConfig { /// The host environment snapshot to use as basis for what is provided to the interpreted program. /// (This is still subject to isolation as well as `excluded_env_vars` and `forwarded_env_vars`.) pub env: Vec<(OsString, OsString)>, /// Determine if validity checking is enabled. pub validate: bool, /// Determines if Stacked Borrows is enabled. pub stacked_borrows: bool, /// Controls alignment checking. pub check_alignment: AlignmentCheck, /// Controls function [ABI](Abi) checking. pub check_abi: bool, /// Action for an op requiring communication with the host. pub isolated_op: IsolatedOp, /// Determines if memory leaks should be ignored. pub ignore_leaks: bool, /// Environment variables that should always be isolated from the host. pub excluded_env_vars: Vec, /// Environment variables that should always be forwarded from the host. pub forwarded_env_vars: Vec, /// Command-line arguments passed to the interpreted program. pub args: Vec, /// The seed to use when non-determinism or randomness are required (e.g. ptr-to-int cast, `getrandom()`). pub seed: Option, /// The stacked borrows pointer ids to report about pub tracked_pointer_tags: FxHashSet, /// The stacked borrows call IDs to report about pub tracked_call_ids: FxHashSet, /// The allocation ids to report about. pub tracked_alloc_ids: FxHashSet, /// Determine if data race detection should be enabled pub data_race_detector: bool, /// Determine if weak memory emulation should be enabled. Requires data race detection to be enabled pub weak_memory_emulation: bool, /// Track when an outdated (weak memory) load happens. pub track_outdated_loads: bool, /// Rate of spurious failures for compare_exchange_weak atomic operations, /// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure). pub cmpxchg_weak_failure_rate: f64, /// If `Some`, enable the `measureme` profiler, writing results to a file /// with the specified prefix. pub measureme_out: Option, /// Panic when unsupported functionality is encountered. pub panic_on_unsupported: bool, /// Which style to use for printing backtraces. pub backtrace_style: BacktraceStyle, /// Which provenance to use for int2ptr casts pub provenance_mode: ProvenanceMode, /// Whether to ignore any output by the program. This is helpful when debugging miri /// as its messages don't get intermingled with the program messages. pub mute_stdout_stderr: bool, /// The probability of the active thread being preempted at the end of each basic block. pub preemption_rate: f64, /// Report the current instruction being executed every N basic blocks. pub report_progress: Option, /// Whether Stacked Borrows retagging should recurse into fields of datatypes. pub retag_fields: bool, /// The location of a shared object file to load when calling external functions /// FIXME! consider allowing users to specify paths to multiple SO files, or to a directory pub external_so_file: Option, /// Run a garbage collector for SbTags every N basic blocks. pub gc_interval: u32, } impl Default for MiriConfig { fn default() -> MiriConfig { MiriConfig { env: vec![], validate: true, stacked_borrows: true, check_alignment: AlignmentCheck::Int, check_abi: true, isolated_op: IsolatedOp::Reject(RejectOpWith::Abort), ignore_leaks: false, excluded_env_vars: vec![], forwarded_env_vars: vec![], args: vec![], seed: None, tracked_pointer_tags: FxHashSet::default(), tracked_call_ids: FxHashSet::default(), tracked_alloc_ids: FxHashSet::default(), data_race_detector: true, weak_memory_emulation: true, track_outdated_loads: false, cmpxchg_weak_failure_rate: 0.8, // 80% measureme_out: None, panic_on_unsupported: false, backtrace_style: BacktraceStyle::Short, provenance_mode: ProvenanceMode::Default, mute_stdout_stderr: false, preemption_rate: 0.01, // 1% report_progress: None, retag_fields: false, external_so_file: None, gc_interval: 10_000, } } } /// Returns a freshly created `InterpCx`, along with an `MPlaceTy` representing /// the location where the return value of the `start` function will be /// written to. /// Public because this is also used by `priroda`. pub fn create_ecx<'mir, 'tcx: 'mir>( tcx: TyCtxt<'tcx>, entry_id: DefId, entry_type: EntryFnType, config: &MiriConfig, ) -> InterpResult<'tcx, (InterpCx<'mir, 'tcx, MiriMachine<'mir, 'tcx>>, MPlaceTy<'tcx, Provenance>)> { let param_env = ty::ParamEnv::reveal_all(); let layout_cx = LayoutCx { tcx, param_env }; let mut ecx = InterpCx::new( tcx, rustc_span::source_map::DUMMY_SP, param_env, MiriMachine::new(config, layout_cx), ); // Some parts of initialization require a full `InterpCx`. MiriMachine::late_init(&mut ecx, config)?; // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore. let sentinel = ecx.try_resolve_path(&["core", "ascii", "escape_default"]); if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) { tcx.sess.fatal( "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \ Use `cargo miri setup` to prepare a sysroot that is suitable for Miri." ); } // Setup first stack frame. let entry_instance = ty::Instance::mono(tcx, entry_id); // First argument is constructed later, because it's skipped if the entry function uses #[start]. // Second argument (argc): length of `config.args`. let argc = Scalar::from_machine_usize(u64::try_from(config.args.len()).unwrap(), &ecx); // Third argument (`argv`): created from `config.args`. let argv = { // Put each argument in memory, collect pointers. let mut argvs = Vec::>::new(); for arg in config.args.iter() { // Make space for `0` terminator. let size = u64::try_from(arg.len()).unwrap().checked_add(1).unwrap(); let arg_type = tcx.mk_array(tcx.types.u8, size); let arg_place = ecx.allocate(ecx.layout_of(arg_type)?, MiriMemoryKind::Machine.into())?; ecx.write_os_str_to_c_str(OsStr::new(arg), arg_place.ptr, size)?; ecx.mark_immutable(&arg_place); argvs.push(arg_place.to_ref(&ecx)); } // Make an array with all these pointers, in the Miri memory. let argvs_layout = ecx.layout_of( tcx.mk_array(tcx.mk_imm_ptr(tcx.types.u8), u64::try_from(argvs.len()).unwrap()), )?; let argvs_place = ecx.allocate(argvs_layout, MiriMemoryKind::Machine.into())?; for (idx, arg) in argvs.into_iter().enumerate() { let place = ecx.mplace_field(&argvs_place, idx)?; ecx.write_immediate(arg, &place.into())?; } ecx.mark_immutable(&argvs_place); // A pointer to that place is the 3rd argument for main. let argv = argvs_place.to_ref(&ecx); // Store `argc` and `argv` for macOS `_NSGetArg{c,v}`. { let argc_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?; ecx.write_scalar(argc, &argc_place.into())?; ecx.mark_immutable(&argc_place); ecx.machine.argc = Some(*argc_place); let argv_place = ecx.allocate( ecx.layout_of(tcx.mk_imm_ptr(tcx.types.unit))?, MiriMemoryKind::Machine.into(), )?; ecx.write_immediate(argv, &argv_place.into())?; ecx.mark_immutable(&argv_place); ecx.machine.argv = Some(*argv_place); } // Store command line as UTF-16 for Windows `GetCommandLineW`. { // Construct a command string with all the arguments. let cmd_utf16: Vec = args_to_utf16_command_string(config.args.iter()); let cmd_type = tcx.mk_array(tcx.types.u16, u64::try_from(cmd_utf16.len()).unwrap()); let cmd_place = ecx.allocate(ecx.layout_of(cmd_type)?, MiriMemoryKind::Machine.into())?; ecx.machine.cmd_line = Some(*cmd_place); // Store the UTF-16 string. We just allocated so we know the bounds are fine. for (idx, &c) in cmd_utf16.iter().enumerate() { let place = ecx.mplace_field(&cmd_place, idx)?; ecx.write_scalar(Scalar::from_u16(c), &place.into())?; } ecx.mark_immutable(&cmd_place); } argv }; // Return place (in static memory so that it does not count as leak). let ret_place = ecx.allocate(ecx.machine.layouts.isize, MiriMemoryKind::Machine.into())?; // Call start function. match entry_type { EntryFnType::Main { .. } => { let start_id = tcx.lang_items().start_fn().unwrap(); let main_ret_ty = tcx.fn_sig(entry_id).output(); let main_ret_ty = main_ret_ty.no_bound_vars().unwrap(); let start_instance = ty::Instance::resolve( tcx, ty::ParamEnv::reveal_all(), start_id, tcx.mk_substs(::std::iter::once(ty::subst::GenericArg::from(main_ret_ty))), ) .unwrap() .unwrap(); let main_ptr = ecx.create_fn_alloc_ptr(FnVal::Instance(entry_instance)); // Inlining of `DEFAULT` from // https://github.com/rust-lang/rust/blob/master/compiler/rustc_session/src/config/sigpipe.rs. // Alaways using DEFAULT is okay since we don't support signals in Miri anyway. let sigpipe = 2; ecx.call_function( start_instance, Abi::Rust, &[ Scalar::from_pointer(main_ptr, &ecx).into(), argc.into(), argv, Scalar::from_u8(sigpipe).into(), ], Some(&ret_place.into()), StackPopCleanup::Root { cleanup: true }, )?; } EntryFnType::Start => { ecx.call_function( entry_instance, Abi::Rust, &[argc.into(), argv], Some(&ret_place.into()), StackPopCleanup::Root { cleanup: true }, )?; } } Ok((ecx, ret_place)) } /// Evaluates the entry function specified by `entry_id`. /// Returns `Some(return_code)` if program executed completed. /// Returns `None` if an evaluation error occurred. #[allow(clippy::needless_lifetimes)] pub fn eval_entry<'tcx>( tcx: TyCtxt<'tcx>, entry_id: DefId, entry_type: EntryFnType, config: MiriConfig, ) -> Option { // Copy setting before we move `config`. let ignore_leaks = config.ignore_leaks; let (mut ecx, ret_place) = match create_ecx(tcx, entry_id, entry_type, &config) { Ok(v) => v, Err(err) => { err.print_backtrace(); panic!("Miri initialization error: {}", err.kind()) } }; // Perform the main execution. let res: thread::Result> = panic::catch_unwind(AssertUnwindSafe(|| { // Main loop. loop { match ecx.schedule()? { SchedulingAction::ExecuteStep => { assert!(ecx.step()?, "a terminated thread was scheduled for execution"); } SchedulingAction::ExecuteTimeoutCallback => { ecx.run_timeout_callback()?; } SchedulingAction::ExecuteDtors => { // This will either enable the thread again (so we go back // to `ExecuteStep`), or determine that this thread is done // for good. ecx.schedule_next_tls_dtor_for_active_thread()?; } SchedulingAction::Stop => { break; } } } let return_code = ecx.read_scalar(&ret_place.into())?.to_machine_isize(&ecx)?; Ok(return_code) })); let res = res.unwrap_or_else(|panic_payload| { ecx.handle_ice(); panic::resume_unwind(panic_payload) }); // Machine cleanup. Only do this if all threads have terminated; threads that are still running // might cause Stacked Borrows errors (https://github.com/rust-lang/miri/issues/2396). if ecx.have_all_terminated() { // Even if all threads have terminated, we have to beware of data races since some threads // might not have joined the main thread (https://github.com/rust-lang/miri/issues/2020, // https://github.com/rust-lang/miri/issues/2508). ecx.allow_data_races_all_threads_done(); EnvVars::cleanup(&mut ecx).expect("error during env var cleanup"); } // Process the result. match res { Ok(return_code) => { if !ignore_leaks { // Check for thread leaks. if !ecx.have_all_terminated() { tcx.sess.err( "the main thread terminated without waiting for all remaining threads", ); tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check"); return None; } // Check for memory leaks. info!("Additonal static roots: {:?}", ecx.machine.static_roots); let leaks = ecx.leak_report(&ecx.machine.static_roots); if leaks != 0 { tcx.sess.err("the evaluated program leaked memory"); tcx.sess.note_without_error("pass `-Zmiri-ignore-leaks` to disable this check"); // Ignore the provided return code - let the reported error // determine the return code. return None; } } Some(return_code) } Err(e) => report_error(&ecx, e), } } /// Turns an array of arguments into a Windows command line string. /// /// The string will be UTF-16 encoded and NUL terminated. /// /// Panics if the zeroth argument contains the `"` character because doublequotes /// in `argv[0]` cannot be encoded using the standard command line parsing rules. /// /// Further reading: /// * [Parsing C++ command-line arguments](https://docs.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-160#parsing-c-command-line-arguments) /// * [The C/C++ Parameter Parsing Rules](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES) fn args_to_utf16_command_string(mut args: I) -> Vec where I: Iterator, T: AsRef, { // Parse argv[0]. Slashes aren't escaped. Literal double quotes are not allowed. let mut cmd = { let arg0 = if let Some(arg0) = args.next() { arg0 } else { return vec![0]; }; let arg0 = arg0.as_ref(); if arg0.contains('"') { panic!("argv[0] cannot contain a doublequote (\") character"); } else { // Always surround argv[0] with quotes. let mut s = String::new(); s.push('"'); s.push_str(arg0); s.push('"'); s } }; // Build the other arguments. for arg in args { let arg = arg.as_ref(); cmd.push(' '); if arg.is_empty() { cmd.push_str("\"\""); } else if !arg.bytes().any(|c| matches!(c, b'"' | b'\t' | b' ')) { // No quote, tab, or space -- no escaping required. cmd.push_str(arg); } else { // Spaces and tabs are escaped by surrounding them in quotes. // Quotes are themselves escaped by using backslashes when in a // quoted block. // Backslashes only need to be escaped when one or more are directly // followed by a quote. Otherwise they are taken literally. cmd.push('"'); let mut chars = arg.chars().peekable(); loop { let mut nslashes = 0; while let Some(&'\\') = chars.peek() { chars.next(); nslashes += 1; } match chars.next() { Some('"') => { cmd.extend(iter::repeat('\\').take(nslashes * 2 + 1)); cmd.push('"'); } Some(c) => { cmd.extend(iter::repeat('\\').take(nslashes)); cmd.push(c); } None => { cmd.extend(iter::repeat('\\').take(nslashes * 2)); break; } } } cmd.push('"'); } } if cmd.contains('\0') { panic!("interior null in command line arguments"); } cmd.encode_utf16().chain(iter::once(0)).collect() } #[cfg(test)] mod tests { use super::*; #[test] #[should_panic(expected = "argv[0] cannot contain a doublequote (\") character")] fn windows_argv0_panic_on_quote() { args_to_utf16_command_string(["\""].iter()); } #[test] fn windows_argv0_no_escape() { // Ensure that a trailing backslash in argv[0] is not escaped. let cmd = String::from_utf16_lossy(&args_to_utf16_command_string( [r"C:\Program Files\", "arg1", "arg 2", "arg \" 3"].iter(), )); assert_eq!(cmd.trim_end_matches('\0'), r#""C:\Program Files\" arg1 "arg 2" "arg \" 3""#); } }