Merge pull request #4405 from nia-e/standalone-ptrace
trace: implement supervisor components for tracing
This commit is contained in:
commit
7f0dbba263
6 changed files with 619 additions and 112 deletions
|
|
@ -121,6 +121,26 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "capstone"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "015ef5d5ca1743e3f94af9509ba6bd2886523cfee46e48d15c2ef5216fd4ac9a"
|
||||
dependencies = [
|
||||
"capstone-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "capstone-sys"
|
||||
version = "0.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2267cb8d16a1e4197863ec4284ffd1aec26fe7e57c58af46b02590a0235809a0"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cargo-platform"
|
||||
version = "0.1.9"
|
||||
|
|
@ -591,6 +611,7 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"aes",
|
||||
"bitflags",
|
||||
"capstone",
|
||||
"chrono",
|
||||
"chrono-tz",
|
||||
"colored",
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ libloading = "0.8"
|
|||
nix = { version = "0.30.1", features = ["mman", "ptrace", "signal"] }
|
||||
ipc-channel = "0.19.0"
|
||||
serde = { version = "1.0.219", features = ["derive"] }
|
||||
capstone = "0.13"
|
||||
|
||||
[dev-dependencies]
|
||||
ui_test = "0.29.1"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
use std::alloc::Layout;
|
||||
use std::ptr::NonNull;
|
||||
|
||||
use nix::sys::mman;
|
||||
use rustc_index::bit_set::DenseBitSet;
|
||||
|
||||
/// How many bytes of memory each bit in the bitset represents.
|
||||
|
|
@ -12,7 +14,7 @@ pub struct IsolatedAlloc {
|
|||
/// Pointers to page-aligned memory that has been claimed by the allocator.
|
||||
/// Every pointer here must point to a page-sized allocation claimed via
|
||||
/// mmap. These pointers are used for "small" allocations.
|
||||
page_ptrs: Vec<*mut u8>,
|
||||
page_ptrs: Vec<NonNull<u8>>,
|
||||
/// Metadata about which bytes have been allocated on each page. The length
|
||||
/// of this vector must be the same as that of `page_ptrs`, and the domain
|
||||
/// size of the bitset must be exactly `page_size / COMPRESSION_FACTOR`.
|
||||
|
|
@ -24,7 +26,7 @@ pub struct IsolatedAlloc {
|
|||
page_infos: Vec<DenseBitSet<usize>>,
|
||||
/// Pointers to multiple-page-sized allocations. These must also be page-aligned,
|
||||
/// with their size stored as the second element of the vector.
|
||||
huge_ptrs: Vec<(*mut u8, usize)>,
|
||||
huge_ptrs: Vec<(NonNull<u8>, usize)>,
|
||||
/// The host (not emulated) page size.
|
||||
page_size: usize,
|
||||
}
|
||||
|
|
@ -137,7 +139,7 @@ impl IsolatedAlloc {
|
|||
unsafe fn alloc_small(
|
||||
page_size: usize,
|
||||
layout: Layout,
|
||||
page: *mut u8,
|
||||
page: NonNull<u8>,
|
||||
pinfo: &mut DenseBitSet<usize>,
|
||||
zeroed: bool,
|
||||
) -> Option<*mut u8> {
|
||||
|
|
@ -164,7 +166,7 @@ impl IsolatedAlloc {
|
|||
// zero out, even if we allocated more
|
||||
ptr.write_bytes(0, layout.size());
|
||||
}
|
||||
return Some(ptr);
|
||||
return Some(ptr.as_ptr());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -172,7 +174,7 @@ impl IsolatedAlloc {
|
|||
}
|
||||
|
||||
/// Expands the available memory pool by adding one page.
|
||||
fn add_page(&mut self) -> (*mut u8, &mut DenseBitSet<usize>) {
|
||||
fn add_page(&mut self) -> (NonNull<u8>, &mut DenseBitSet<usize>) {
|
||||
// SAFETY: mmap is always safe to call when requesting anonymous memory
|
||||
let page_ptr = unsafe {
|
||||
libc::mmap(
|
||||
|
|
@ -189,8 +191,8 @@ impl IsolatedAlloc {
|
|||
// `page_infos` has to have one bit for each `COMPRESSION_FACTOR`-sized chunk of bytes in the page.
|
||||
assert!(self.page_size % COMPRESSION_FACTOR == 0);
|
||||
self.page_infos.push(DenseBitSet::new_empty(self.page_size / COMPRESSION_FACTOR));
|
||||
self.page_ptrs.push(page_ptr);
|
||||
(page_ptr, self.page_infos.last_mut().unwrap())
|
||||
self.page_ptrs.push(NonNull::new(page_ptr).unwrap());
|
||||
(NonNull::new(page_ptr).unwrap(), self.page_infos.last_mut().unwrap())
|
||||
}
|
||||
|
||||
/// Allocates in multiples of one page on the host system.
|
||||
|
|
@ -212,7 +214,7 @@ impl IsolatedAlloc {
|
|||
.cast::<u8>()
|
||||
};
|
||||
assert_ne!(ret.addr(), usize::MAX, "mmap failed");
|
||||
self.huge_ptrs.push((ret, size));
|
||||
self.huge_ptrs.push((NonNull::new(ret).unwrap(), size));
|
||||
// huge_normalized_layout ensures that we've overallocated enough space
|
||||
// for this to be valid.
|
||||
ret.map_addr(|a| a.next_multiple_of(layout.align()))
|
||||
|
|
@ -246,7 +248,7 @@ impl IsolatedAlloc {
|
|||
// from us pointing to this page, and we know it was allocated
|
||||
// in add_page as exactly a single page.
|
||||
unsafe {
|
||||
assert_eq!(libc::munmap(page_ptr.cast(), self.page_size), 0);
|
||||
assert_eq!(libc::munmap(page_ptr.as_ptr().cast(), self.page_size), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -265,7 +267,7 @@ impl IsolatedAlloc {
|
|||
// This could be made faster if the list was sorted -- the allocator isn't fully optimized at the moment.
|
||||
let pinfo = std::iter::zip(&mut self.page_ptrs, &mut self.page_infos)
|
||||
.enumerate()
|
||||
.find(|(_, (page, _))| page.addr() == page_addr);
|
||||
.find(|(_, (page, _))| page.addr().get() == page_addr);
|
||||
let Some((idx_of_pinfo, (_, pinfo))) = pinfo else {
|
||||
panic!("Freeing in an unallocated page: {ptr:?}\nHolding pages {:?}", self.page_ptrs)
|
||||
};
|
||||
|
|
@ -287,7 +289,7 @@ impl IsolatedAlloc {
|
|||
.huge_ptrs
|
||||
.iter()
|
||||
.position(|&(pg, size)| {
|
||||
pg.addr() <= ptr.addr() && ptr.addr() < pg.addr().strict_add(size)
|
||||
pg.addr().get() <= ptr.addr() && ptr.addr() < pg.addr().get().strict_add(size)
|
||||
})
|
||||
.expect("Freeing unallocated pages");
|
||||
// And kick it from the list
|
||||
|
|
@ -295,21 +297,58 @@ impl IsolatedAlloc {
|
|||
assert_eq!(size, size2, "got wrong layout in dealloc");
|
||||
// SAFETY: huge_ptrs contains allocations made with mmap with the size recorded there.
|
||||
unsafe {
|
||||
let ret = libc::munmap(un_offset_ptr.cast(), size);
|
||||
let ret = libc::munmap(un_offset_ptr.as_ptr().cast(), size);
|
||||
assert_eq!(ret, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a vector of page addresses managed by the allocator.
|
||||
pub fn pages(&self) -> Vec<usize> {
|
||||
let mut pages: Vec<_> =
|
||||
self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance()).collect();
|
||||
for (ptr, size) in &self.huge_ptrs {
|
||||
let mut pages: Vec<usize> =
|
||||
self.page_ptrs.clone().into_iter().map(|p| p.expose_provenance().get()).collect();
|
||||
self.huge_ptrs.iter().for_each(|(ptr, size)| {
|
||||
for i in 0..size / self.page_size {
|
||||
pages.push(ptr.expose_provenance().strict_add(i * self.page_size));
|
||||
pages.push(ptr.expose_provenance().get().strict_add(i * self.page_size));
|
||||
}
|
||||
});
|
||||
pages
|
||||
}
|
||||
|
||||
/// Protects all owned memory as `PROT_NONE`, preventing accesses.
|
||||
///
|
||||
/// SAFETY: Accessing memory after this point will result in a segfault
|
||||
/// unless it is first unprotected.
|
||||
pub unsafe fn prepare_ffi(&mut self) -> Result<(), nix::errno::Errno> {
|
||||
let prot = mman::ProtFlags::PROT_NONE;
|
||||
unsafe { self.mprotect(prot) }
|
||||
}
|
||||
|
||||
/// Deprotects all owned memory by setting it to RW. Erroring here is very
|
||||
/// likely unrecoverable, so it may panic if applying those permissions
|
||||
/// fails.
|
||||
pub fn unprep_ffi(&mut self) {
|
||||
let prot = mman::ProtFlags::PROT_READ | mman::ProtFlags::PROT_WRITE;
|
||||
unsafe {
|
||||
self.mprotect(prot).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
/// Applies `prot` to every page managed by the allocator.
|
||||
///
|
||||
/// SAFETY: Accessing memory in violation of the protection flags will
|
||||
/// trigger a segfault.
|
||||
unsafe fn mprotect(&mut self, prot: mman::ProtFlags) -> Result<(), nix::errno::Errno> {
|
||||
for &pg in &self.page_ptrs {
|
||||
unsafe {
|
||||
mman::mprotect(pg.cast(), self.page_size, prot)?;
|
||||
}
|
||||
}
|
||||
pages
|
||||
for &(hpg, size) in &self.huge_ptrs {
|
||||
unsafe {
|
||||
mman::mprotect(hpg.cast(), size.next_multiple_of(self.page_size), prot)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -229,7 +229,14 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
|
|||
.collect::<Vec<libffi::high::Arg<'_>>>();
|
||||
|
||||
// Call the function and store output, depending on return type in the function signature.
|
||||
let (ret, _) = this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
|
||||
let (ret, maybe_memevents) =
|
||||
this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;
|
||||
|
||||
if cfg!(target_os = "linux")
|
||||
&& let Some(events) = maybe_memevents
|
||||
{
|
||||
trace!("Registered FFI events:\n{events:#0x?}");
|
||||
}
|
||||
|
||||
this.write_immediate(*ret, dest)?;
|
||||
interp_ok(true)
|
||||
|
|
@ -250,15 +257,15 @@ unsafe fn do_native_call<T: libffi::high::CType>(
|
|||
|
||||
unsafe {
|
||||
if let Some(alloc) = alloc {
|
||||
// SAFETY: We don't touch the machine memory past this point
|
||||
// SAFETY: We don't touch the machine memory past this point.
|
||||
let (guard, stack_ptr) = Supervisor::start_ffi(alloc.clone());
|
||||
// SAFETY: Upheld by caller
|
||||
// SAFETY: Upheld by caller.
|
||||
let ret = ffi::call(ptr, args);
|
||||
// SAFETY: We got the guard and stack pointer from start_ffi, and
|
||||
// the allocator is the same
|
||||
// the allocator is the same.
|
||||
(ret, Supervisor::end_ffi(guard, alloc, stack_ptr))
|
||||
} else {
|
||||
// SAFETY: Upheld by caller
|
||||
// SAFETY: Upheld by caller.
|
||||
(ffi::call(ptr, args), None)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,30 +52,40 @@ impl Supervisor {
|
|||
// If the supervisor is not initialised for whatever reason, fast-fail.
|
||||
// This might be desired behaviour, as even on platforms where ptracing
|
||||
// is not implemented it enables us to enforce that only one FFI call
|
||||
// happens at a time
|
||||
// happens at a time.
|
||||
let Some(sv) = sv_guard.take() else {
|
||||
return (sv_guard, None);
|
||||
};
|
||||
|
||||
// Get pointers to all the pages the supervisor must allow accesses in
|
||||
// and prepare the fake stack
|
||||
// and prepare the fake stack.
|
||||
let page_ptrs = alloc.borrow().pages();
|
||||
let raw_stack_ptr: *mut [u8; FAKE_STACK_SIZE] =
|
||||
Box::leak(Box::new([0u8; FAKE_STACK_SIZE])).as_mut_ptr().cast();
|
||||
let stack_ptr = raw_stack_ptr.expose_provenance();
|
||||
let start_info = StartFfiInfo { page_ptrs, stack_ptr };
|
||||
|
||||
// SAFETY: We do not access machine memory past this point until the
|
||||
// supervisor is ready to allow it.
|
||||
unsafe {
|
||||
if alloc.borrow_mut().prepare_ffi().is_err() {
|
||||
// Don't mess up unwinding by maybe leaving the memory partly protected
|
||||
alloc.borrow_mut().unprep_ffi();
|
||||
panic!("Cannot protect memory for FFI call!");
|
||||
}
|
||||
}
|
||||
|
||||
// Send over the info.
|
||||
// NB: if we do not wait to receive a blank confirmation response, it is
|
||||
// possible that the supervisor is alerted of the SIGSTOP *before* it has
|
||||
// actually received the start_info, thus deadlocking! This way, we can
|
||||
// enforce an ordering for these events
|
||||
// enforce an ordering for these events.
|
||||
sv.message_tx.send(TraceRequest::StartFfi(start_info)).unwrap();
|
||||
sv.confirm_rx.recv().unwrap();
|
||||
*sv_guard = Some(sv);
|
||||
// We need to be stopped for the supervisor to be able to make certain
|
||||
// modifications to our memory - simply waiting on the recv() doesn't
|
||||
// count
|
||||
// count.
|
||||
signal::raise(signal::SIGSTOP).unwrap();
|
||||
(sv_guard, Some(raw_stack_ptr))
|
||||
}
|
||||
|
|
@ -90,7 +100,7 @@ impl Supervisor {
|
|||
/// one passed to it also.
|
||||
pub unsafe fn end_ffi(
|
||||
mut sv_guard: std::sync::MutexGuard<'static, Option<Supervisor>>,
|
||||
_alloc: Rc<RefCell<IsolatedAlloc>>,
|
||||
alloc: Rc<RefCell<IsolatedAlloc>>,
|
||||
raw_stack_ptr: Option<*mut [u8; FAKE_STACK_SIZE]>,
|
||||
) -> Option<MemEvents> {
|
||||
// We can't use IPC channels here to signal that FFI mode has ended,
|
||||
|
|
@ -99,19 +109,22 @@ impl Supervisor {
|
|||
// simpler and more robust to simply use the signals which are left for
|
||||
// arbitrary usage. Since this will block until we are continued by the
|
||||
// supervisor, we can assume past this point that everything is back to
|
||||
// normal
|
||||
// normal.
|
||||
signal::raise(signal::SIGUSR1).unwrap();
|
||||
|
||||
// This is safe! It just sets memory to normal expected permissions.
|
||||
alloc.borrow_mut().unprep_ffi();
|
||||
|
||||
// If this is `None`, then `raw_stack_ptr` is None and does not need to
|
||||
// be deallocated (and there's no need to worry about the guard, since
|
||||
// it contains nothing)
|
||||
// it contains nothing).
|
||||
let sv = sv_guard.take()?;
|
||||
// SAFETY: Caller upholds that this pointer was allocated as a box with
|
||||
// this type
|
||||
// this type.
|
||||
unsafe {
|
||||
drop(Box::from_raw(raw_stack_ptr.unwrap()));
|
||||
}
|
||||
// On the off-chance something really weird happens, don't block forever
|
||||
// On the off-chance something really weird happens, don't block forever.
|
||||
let ret = sv
|
||||
.event_rx
|
||||
.try_recv_timeout(std::time::Duration::from_secs(5))
|
||||
|
|
@ -138,33 +151,34 @@ impl Supervisor {
|
|||
/// The invariants for `fork()` must be upheld by the caller.
|
||||
pub unsafe fn init_sv() -> Result<(), SvInitError> {
|
||||
// FIXME: Much of this could be reimplemented via the mitosis crate if we upstream the
|
||||
// relevant missing bits
|
||||
// relevant missing bits.
|
||||
|
||||
// On Linux, this will check whether ptrace is fully disabled by the Yama module.
|
||||
// If Yama isn't running or we're not on Linux, we'll still error later, but
|
||||
// this saves a very expensive fork call
|
||||
// this saves a very expensive fork call.
|
||||
let ptrace_status = std::fs::read_to_string("/proc/sys/kernel/yama/ptrace_scope");
|
||||
if let Ok(stat) = ptrace_status {
|
||||
if let Some(stat) = stat.chars().next() {
|
||||
// Fast-error if ptrace is fully disabled on the system
|
||||
// Fast-error if ptrace is fully disabled on the system.
|
||||
if stat == '3' {
|
||||
return Err(SvInitError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialise the supervisor if it isn't already, placing it into SUPERVISOR
|
||||
// Initialise the supervisor if it isn't already, placing it into SUPERVISOR.
|
||||
let mut lock = SUPERVISOR.lock().unwrap();
|
||||
if lock.is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Prepare the IPC channels we need
|
||||
// Prepare the IPC channels we need.
|
||||
let (message_tx, message_rx) = ipc::channel().unwrap();
|
||||
let (confirm_tx, confirm_rx) = ipc::channel().unwrap();
|
||||
let (event_tx, event_rx) = ipc::channel().unwrap();
|
||||
// SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error
|
||||
// SAFETY: Calling sysconf(_SC_PAGESIZE) is always safe and cannot error.
|
||||
let page_size = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }.try_into().unwrap();
|
||||
super::parent::PAGE_SIZE.store(page_size, std::sync::atomic::Ordering::Relaxed);
|
||||
|
||||
unsafe {
|
||||
// TODO: Maybe use clone3() instead for better signalling of when the child exits?
|
||||
|
|
@ -172,37 +186,36 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
|
|||
match unistd::fork().unwrap() {
|
||||
unistd::ForkResult::Parent { child } => {
|
||||
// If somehow another thread does exist, prevent it from accessing the lock
|
||||
// and thus breaking our safety invariants
|
||||
// and thus breaking our safety invariants.
|
||||
std::mem::forget(lock);
|
||||
// The child process is free to unwind, so we won't to avoid doubly freeing
|
||||
// system resources
|
||||
// system resources.
|
||||
let init = std::panic::catch_unwind(|| {
|
||||
let listener =
|
||||
ChildListener { message_rx, attached: false, override_retcode: None };
|
||||
// Trace as many things as possible, to be able to handle them as needed
|
||||
// Trace as many things as possible, to be able to handle them as needed.
|
||||
let options = ptrace::Options::PTRACE_O_TRACESYSGOOD
|
||||
| ptrace::Options::PTRACE_O_TRACECLONE
|
||||
| ptrace::Options::PTRACE_O_TRACEFORK;
|
||||
// Attach to the child process without stopping it
|
||||
// Attach to the child process without stopping it.
|
||||
match ptrace::seize(child, options) {
|
||||
// Ptrace works :D
|
||||
Ok(_) => {
|
||||
let code = sv_loop(listener, child, event_tx, confirm_tx, page_size)
|
||||
.unwrap_err();
|
||||
let code = sv_loop(listener, child, event_tx, confirm_tx).unwrap_err();
|
||||
// If a return code of 0 is not explicitly given, assume something went
|
||||
// wrong and return 1
|
||||
std::process::exit(code.unwrap_or(1))
|
||||
// wrong and return 1.
|
||||
std::process::exit(code.0.unwrap_or(1))
|
||||
}
|
||||
// Ptrace does not work and we failed to catch that
|
||||
// Ptrace does not work and we failed to catch that.
|
||||
Err(_) => {
|
||||
// If we can't ptrace, Miri continues being the parent
|
||||
// If we can't ptrace, Miri continues being the parent.
|
||||
signal::kill(child, signal::SIGKILL).unwrap();
|
||||
SvInitError
|
||||
}
|
||||
}
|
||||
});
|
||||
match init {
|
||||
// The "Ok" case means that we couldn't ptrace
|
||||
// The "Ok" case means that we couldn't ptrace.
|
||||
Ok(e) => return Err(e),
|
||||
Err(p) => {
|
||||
eprintln!("Supervisor process panicked!\n{p:?}");
|
||||
|
|
@ -212,12 +225,12 @@ pub unsafe fn init_sv() -> Result<(), SvInitError> {
|
|||
}
|
||||
unistd::ForkResult::Child => {
|
||||
// Make sure we never get orphaned and stuck in SIGSTOP or similar
|
||||
// SAFETY: prctl PR_SET_PDEATHSIG is always safe to call
|
||||
// SAFETY: prctl PR_SET_PDEATHSIG is always safe to call.
|
||||
let ret = libc::prctl(libc::PR_SET_PDEATHSIG, libc::SIGTERM);
|
||||
assert_eq!(ret, 0);
|
||||
// First make sure the parent succeeded with ptracing us!
|
||||
signal::raise(signal::SIGSTOP).unwrap();
|
||||
// If we're the child process, save the supervisor info
|
||||
// If we're the child process, save the supervisor info.
|
||||
*lock = Some(Supervisor { message_tx, confirm_rx, event_rx });
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,16 +1,108 @@
|
|||
use std::sync::atomic::{AtomicPtr, AtomicUsize};
|
||||
|
||||
use ipc_channel::ipc;
|
||||
use nix::sys::{ptrace, signal, wait};
|
||||
use nix::unistd;
|
||||
|
||||
use super::StartFfiInfo;
|
||||
use super::messages::{Confirmation, MemEvents, TraceRequest};
|
||||
use crate::shims::trace::messages::{Confirmation, MemEvents, TraceRequest};
|
||||
use crate::shims::trace::{AccessEvent, FAKE_STACK_SIZE, StartFfiInfo};
|
||||
|
||||
/// The flags to use when calling `waitid()`.
|
||||
/// Since bitwise OR on the nix version of these flags is implemented as a trait,
|
||||
/// we can't use them directly so we do it this way
|
||||
/// Since bitwise or on the nix version of these flags is implemented as a trait,
|
||||
/// this cannot be const directly so we do it this way.
|
||||
const WAIT_FLAGS: wait::WaitPidFlag =
|
||||
wait::WaitPidFlag::from_bits_truncate(libc::WUNTRACED | libc::WEXITED);
|
||||
|
||||
/// Arch-specific maximum size a single access might perform. x86 value is set
|
||||
/// assuming nothing bigger than AVX-512 is available.
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
const ARCH_MAX_ACCESS_SIZE: usize = 64;
|
||||
/// The largest arm64 simd instruction operates on 16 bytes.
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||
const ARCH_MAX_ACCESS_SIZE: usize = 16;
|
||||
/// The max riscv vector instruction can access 8 consecutive 32-bit values.
|
||||
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
|
||||
const ARCH_MAX_ACCESS_SIZE: usize = 32;
|
||||
|
||||
/// The default word size on a given platform, in bytes.
|
||||
#[cfg(any(target_arch = "x86", target_arch = "arm", target_arch = "riscv32"))]
|
||||
const ARCH_WORD_SIZE: usize = 4;
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64", target_arch = "riscv64"))]
|
||||
const ARCH_WORD_SIZE: usize = 8;
|
||||
|
||||
/// The address of the page set to be edited, initialised to a sentinel null
|
||||
/// pointer.
|
||||
static PAGE_ADDR: AtomicPtr<u8> = AtomicPtr::new(std::ptr::null_mut());
|
||||
/// The host pagesize, initialised to a sentinel zero value.
|
||||
pub static PAGE_SIZE: AtomicUsize = AtomicUsize::new(0);
|
||||
/// How many consecutive pages to unprotect. 1 by default, unlikely to be set
|
||||
/// higher than 2.
|
||||
static PAGE_COUNT: AtomicUsize = AtomicUsize::new(1);
|
||||
|
||||
/// Allows us to get common arguments from the `user_regs_t` across architectures.
|
||||
/// Normally this would land us ABI hell, but thankfully all of our usecases
|
||||
/// consist of functions with a small number of register-sized integer arguments.
|
||||
/// See <https://man7.org/linux/man-pages/man2/syscall.2.html> for sources.
|
||||
trait ArchIndependentRegs {
|
||||
/// Gets the address of the instruction pointer.
|
||||
fn ip(&self) -> usize;
|
||||
/// Set the instruction pointer; remember to also set the stack pointer, or
|
||||
/// else the stack might get messed up!
|
||||
fn set_ip(&mut self, ip: usize);
|
||||
/// Set the stack pointer, ideally to a zeroed-out area.
|
||||
fn set_sp(&mut self, sp: usize);
|
||||
}
|
||||
|
||||
// It's fine / desirable behaviour for values to wrap here, we care about just
|
||||
// preserving the bit pattern.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[expect(clippy::as_conversions)]
|
||||
#[rustfmt::skip]
|
||||
impl ArchIndependentRegs for libc::user_regs_struct {
|
||||
#[inline]
|
||||
fn ip(&self) -> usize { self.rip as _ }
|
||||
#[inline]
|
||||
fn set_ip(&mut self, ip: usize) { self.rip = ip as _ }
|
||||
#[inline]
|
||||
fn set_sp(&mut self, sp: usize) { self.rsp = sp as _ }
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
#[expect(clippy::as_conversions)]
|
||||
#[rustfmt::skip]
|
||||
impl ArchIndependentRegs for libc::user_regs_struct {
|
||||
#[inline]
|
||||
fn ip(&self) -> usize { self.eip as _ }
|
||||
#[inline]
|
||||
fn set_ip(&mut self, ip: usize) { self.eip = ip as _ }
|
||||
#[inline]
|
||||
fn set_sp(&mut self, sp: usize) { self.esp = sp as _ }
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[expect(clippy::as_conversions)]
|
||||
#[rustfmt::skip]
|
||||
impl ArchIndependentRegs for libc::user_regs_struct {
|
||||
#[inline]
|
||||
fn ip(&self) -> usize { self.pc as _ }
|
||||
#[inline]
|
||||
fn set_ip(&mut self, ip: usize) { self.pc = ip as _ }
|
||||
#[inline]
|
||||
fn set_sp(&mut self, sp: usize) { self.sp = sp as _ }
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
|
||||
#[expect(clippy::as_conversions)]
|
||||
#[rustfmt::skip]
|
||||
impl ArchIndependentRegs for libc::user_regs_struct {
|
||||
#[inline]
|
||||
fn ip(&self) -> usize { self.pc as _ }
|
||||
#[inline]
|
||||
fn set_ip(&mut self, ip: usize) { self.pc = ip as _ }
|
||||
#[inline]
|
||||
fn set_sp(&mut self, sp: usize) { self.sp = sp as _ }
|
||||
}
|
||||
|
||||
/// A unified event representing something happening on the child process. Wraps
|
||||
/// `nix`'s `WaitStatus` and our custom signals so it can all be done with one
|
||||
/// `match` statement.
|
||||
|
|
@ -22,7 +114,7 @@ pub enum ExecEvent {
|
|||
End,
|
||||
/// The child process with the specified pid was stopped by the given signal.
|
||||
Status(unistd::Pid, signal::Signal),
|
||||
/// The child process with the specified pid entered or exited a syscall.
|
||||
/// The child process with the specified pid entered or existed a syscall.
|
||||
Syscall(unistd::Pid),
|
||||
/// A child process exited or was killed; if we have a return code, it is
|
||||
/// specified.
|
||||
|
|
@ -42,10 +134,10 @@ pub struct ChildListener {
|
|||
impl Iterator for ChildListener {
|
||||
type Item = ExecEvent;
|
||||
|
||||
// Allows us to monitor the child process by just iterating over the listener
|
||||
// Allows us to monitor the child process by just iterating over the listener.
|
||||
// NB: This should never return None!
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// Do not block if the child has nothing to report for `waitid`
|
||||
// Do not block if the child has nothing to report for `waitid`.
|
||||
let opts = WAIT_FLAGS | wait::WaitPidFlag::WNOHANG;
|
||||
loop {
|
||||
// Listen to any child, not just the main one. Important if we want
|
||||
|
|
@ -55,17 +147,17 @@ impl Iterator for ChildListener {
|
|||
match wait::waitid(wait::Id::All, opts) {
|
||||
Ok(stat) =>
|
||||
match stat {
|
||||
// Child exited normally with a specific code set
|
||||
// Child exited normally with a specific code set.
|
||||
wait::WaitStatus::Exited(_, code) => {
|
||||
let code = self.override_retcode.unwrap_or(code);
|
||||
return Some(ExecEvent::Died(Some(code)));
|
||||
}
|
||||
// Child was killed by a signal, without giving a code
|
||||
// Child was killed by a signal, without giving a code.
|
||||
wait::WaitStatus::Signaled(_, _, _) =>
|
||||
return Some(ExecEvent::Died(self.override_retcode)),
|
||||
// Child entered a syscall. Since we're always technically
|
||||
// tracing, only pass this along if we're actively
|
||||
// monitoring the child
|
||||
// monitoring the child.
|
||||
wait::WaitStatus::PtraceSyscall(pid) =>
|
||||
if self.attached {
|
||||
return Some(ExecEvent::Syscall(pid));
|
||||
|
|
@ -84,11 +176,11 @@ impl Iterator for ChildListener {
|
|||
return Some(ExecEvent::Status(pid, signal));
|
||||
}
|
||||
} else {
|
||||
// Just pass along the signal
|
||||
// Just pass along the signal.
|
||||
ptrace::cont(pid, signal).unwrap();
|
||||
},
|
||||
// Child was stopped at the given signal. Same logic as for
|
||||
// WaitStatus::PtraceEvent
|
||||
// WaitStatus::PtraceEvent.
|
||||
wait::WaitStatus::Stopped(pid, signal) =>
|
||||
if self.attached {
|
||||
if signal == signal::SIGUSR1 {
|
||||
|
|
@ -104,11 +196,11 @@ impl Iterator for ChildListener {
|
|||
},
|
||||
// This case should only trigger if all children died and we
|
||||
// somehow missed that, but it's best we not allow any room
|
||||
// for deadlocks
|
||||
// for deadlocks.
|
||||
Err(_) => return Some(ExecEvent::Died(None)),
|
||||
}
|
||||
|
||||
// Similarly, do a non-blocking poll of the IPC channel
|
||||
// Similarly, do a non-blocking poll of the IPC channel.
|
||||
if let Ok(req) = self.message_rx.try_recv() {
|
||||
match req {
|
||||
TraceRequest::StartFfi(info) =>
|
||||
|
|
@ -123,18 +215,16 @@ impl Iterator for ChildListener {
|
|||
}
|
||||
}
|
||||
|
||||
// Not ideal, but doing anything else might sacrifice performance
|
||||
// Not ideal, but doing anything else might sacrifice performance.
|
||||
std::thread::yield_now();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An error came up while waiting on the child process to do something.
|
||||
/// It likely died, with this return code if we have one.
|
||||
#[derive(Debug)]
|
||||
enum ExecError {
|
||||
/// The child process died with this return code, if we have one.
|
||||
Died(Option<i32>),
|
||||
}
|
||||
pub struct ExecEnd(pub Option<i32>);
|
||||
|
||||
/// This is the main loop of the supervisor process. It runs in a separate
|
||||
/// process from the rest of Miri (but because we fork, addresses for anything
|
||||
|
|
@ -144,35 +234,37 @@ pub fn sv_loop(
|
|||
init_pid: unistd::Pid,
|
||||
event_tx: ipc::IpcSender<MemEvents>,
|
||||
confirm_tx: ipc::IpcSender<Confirmation>,
|
||||
_page_size: usize,
|
||||
) -> Result<!, Option<i32>> {
|
||||
// Things that we return to the child process
|
||||
) -> Result<!, ExecEnd> {
|
||||
// Get the pagesize set and make sure it isn't still on the zero sentinel value!
|
||||
let page_size = PAGE_SIZE.load(std::sync::atomic::Ordering::Relaxed);
|
||||
assert_ne!(page_size, 0);
|
||||
|
||||
// Things that we return to the child process.
|
||||
let mut acc_events = Vec::new();
|
||||
|
||||
// Memory allocated on the MiriMachine
|
||||
let mut _ch_pages = Vec::new();
|
||||
let mut _ch_stack = None;
|
||||
// Memory allocated for the MiriMachine.
|
||||
let mut ch_pages = Vec::new();
|
||||
let mut ch_stack = None;
|
||||
|
||||
// An instance of the Capstone disassembler, so we don't spawn one on every access.
|
||||
let cs = get_disasm();
|
||||
|
||||
// The pid of the last process we interacted with, used by default if we don't have a
|
||||
// reason to use a different one
|
||||
// reason to use a different one.
|
||||
let mut curr_pid = init_pid;
|
||||
|
||||
// There's an initial sigstop we need to deal with
|
||||
wait_for_signal(Some(curr_pid), signal::SIGSTOP, false).map_err(|e| {
|
||||
match e {
|
||||
ExecError::Died(code) => code,
|
||||
}
|
||||
})?;
|
||||
// There's an initial sigstop we need to deal with.
|
||||
wait_for_signal(Some(curr_pid), signal::SIGSTOP, false)?;
|
||||
ptrace::cont(curr_pid, None).unwrap();
|
||||
|
||||
for evt in listener {
|
||||
match evt {
|
||||
// start_ffi was called by the child, so prep memory
|
||||
// start_ffi was called by the child, so prep memory.
|
||||
ExecEvent::Start(ch_info) => {
|
||||
// All the pages that the child process is "allowed to" access
|
||||
_ch_pages = ch_info.page_ptrs;
|
||||
// And the fake stack it allocated for us to use later
|
||||
_ch_stack = Some(ch_info.stack_ptr);
|
||||
// All the pages that the child process is "allowed to" access.
|
||||
ch_pages = ch_info.page_ptrs;
|
||||
// And the fake stack it allocated for us to use later.
|
||||
ch_stack = Some(ch_info.stack_ptr);
|
||||
|
||||
// We received the signal and are no longer in the main listener loop,
|
||||
// so we can let the child move on to the end of start_ffi where it will
|
||||
|
|
@ -180,39 +272,54 @@ pub fn sv_loop(
|
|||
// order to do most ptrace operations!
|
||||
confirm_tx.send(Confirmation).unwrap();
|
||||
// We can't trust simply calling `Pid::this()` in the child process to give the right
|
||||
// PID for us, so we get it this way
|
||||
// PID for us, so we get it this way.
|
||||
curr_pid = wait_for_signal(None, signal::SIGSTOP, false).unwrap();
|
||||
|
||||
ptrace::syscall(curr_pid, None).unwrap();
|
||||
}
|
||||
// end_ffi was called by the child
|
||||
// end_ffi was called by the child.
|
||||
ExecEvent::End => {
|
||||
// Hand over the access info we traced
|
||||
// Hand over the access info we traced.
|
||||
event_tx.send(MemEvents { acc_events }).unwrap();
|
||||
// And reset our values
|
||||
// And reset our values.
|
||||
acc_events = Vec::new();
|
||||
_ch_stack = None;
|
||||
ch_stack = None;
|
||||
|
||||
// No need to monitor syscalls anymore, they'd just be ignored
|
||||
// No need to monitor syscalls anymore, they'd just be ignored.
|
||||
ptrace::cont(curr_pid, None).unwrap();
|
||||
}
|
||||
// Child process was stopped by a signal
|
||||
ExecEvent::Status(pid, signal) => {
|
||||
eprintln!("Process unexpectedly got {signal}; continuing...");
|
||||
// In case we're not tracing
|
||||
if ptrace::syscall(pid, signal).is_err() {
|
||||
// If *this* fails too, something really weird happened
|
||||
// and it's probably best to just panic
|
||||
signal::kill(pid, signal::SIGCONT).unwrap();
|
||||
}
|
||||
}
|
||||
ExecEvent::Status(pid, signal) =>
|
||||
match signal {
|
||||
// If it was a segfault, check if it was an artificial one
|
||||
// caused by it trying to access the MiriMachine memory.
|
||||
signal::SIGSEGV =>
|
||||
handle_segfault(
|
||||
pid,
|
||||
&ch_pages,
|
||||
ch_stack.unwrap(),
|
||||
page_size,
|
||||
&cs,
|
||||
&mut acc_events,
|
||||
)?,
|
||||
// Something weird happened.
|
||||
_ => {
|
||||
eprintln!("Process unexpectedly got {signal}; continuing...");
|
||||
// In case we're not tracing
|
||||
if ptrace::syscall(pid, None).is_err() {
|
||||
// If *this* fails too, something really weird happened
|
||||
// and it's probably best to just panic.
|
||||
signal::kill(pid, signal::SIGCONT).unwrap();
|
||||
}
|
||||
}
|
||||
},
|
||||
// Child entered a syscall; we wait for exits inside of this, so it
|
||||
// should never trigger on return from a syscall we care about
|
||||
// should never trigger on return from a syscall we care about.
|
||||
ExecEvent::Syscall(pid) => {
|
||||
ptrace::syscall(pid, None).unwrap();
|
||||
}
|
||||
ExecEvent::Died(code) => {
|
||||
return Err(code);
|
||||
return Err(ExecEnd(code));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -220,6 +327,30 @@ pub fn sv_loop(
|
|||
unreachable!()
|
||||
}
|
||||
|
||||
/// Spawns a Capstone disassembler for the host architecture.
|
||||
#[rustfmt::skip]
|
||||
fn get_disasm() -> capstone::Capstone {
|
||||
use capstone::prelude::*;
|
||||
let cs_pre = Capstone::new();
|
||||
{
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{cs_pre.x86().mode(arch::x86::ArchMode::Mode64)}
|
||||
#[cfg(target_arch = "x86")]
|
||||
{cs_pre.x86().mode(arch::x86::ArchMode::Mode32)}
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
{cs_pre.arm64().mode(arch::arm64::ArchMode::Arm)}
|
||||
#[cfg(target_arch = "arm")]
|
||||
{cs_pre.arm().mode(arch::arm::ArchMode::Arm)}
|
||||
#[cfg(target_arch = "riscv64")]
|
||||
{cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV64)}
|
||||
#[cfg(target_arch = "riscv32")]
|
||||
{cs_pre.riscv().mode(arch::riscv::ArchMode::RiscV32)}
|
||||
}
|
||||
.detail(true)
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
/// Waits for `wait_signal`. If `init_cont`, it will first do a `ptrace::cont`.
|
||||
/// We want to avoid that in some cases, like at the beginning of FFI.
|
||||
///
|
||||
|
|
@ -228,27 +359,27 @@ fn wait_for_signal(
|
|||
pid: Option<unistd::Pid>,
|
||||
wait_signal: signal::Signal,
|
||||
init_cont: bool,
|
||||
) -> Result<unistd::Pid, ExecError> {
|
||||
) -> Result<unistd::Pid, ExecEnd> {
|
||||
if init_cont {
|
||||
ptrace::cont(pid.unwrap(), None).unwrap();
|
||||
}
|
||||
// Repeatedly call `waitid` until we get the signal we want, or the process dies
|
||||
// Repeatedly call `waitid` until we get the signal we want, or the process dies.
|
||||
loop {
|
||||
let wait_id = match pid {
|
||||
Some(pid) => wait::Id::Pid(pid),
|
||||
None => wait::Id::All,
|
||||
};
|
||||
let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecError::Died(None))?;
|
||||
let stat = wait::waitid(wait_id, WAIT_FLAGS).map_err(|_| ExecEnd(None))?;
|
||||
let (signal, pid) = match stat {
|
||||
// Report the cause of death, if we know it
|
||||
// Report the cause of death, if we know it.
|
||||
wait::WaitStatus::Exited(_, code) => {
|
||||
return Err(ExecError::Died(Some(code)));
|
||||
return Err(ExecEnd(Some(code)));
|
||||
}
|
||||
wait::WaitStatus::Signaled(_, _, _) => return Err(ExecError::Died(None)),
|
||||
wait::WaitStatus::Signaled(_, _, _) => return Err(ExecEnd(None)),
|
||||
wait::WaitStatus::Stopped(pid, signal) => (signal, pid),
|
||||
wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid),
|
||||
// This covers PtraceSyscall and variants that are impossible with
|
||||
// the flags set (e.g. WaitStatus::StillAlive)
|
||||
// the flags set (e.g. WaitStatus::StillAlive).
|
||||
_ => {
|
||||
ptrace::cont(pid.unwrap(), None).unwrap();
|
||||
continue;
|
||||
|
|
@ -257,7 +388,302 @@ fn wait_for_signal(
|
|||
if signal == wait_signal {
|
||||
return Ok(pid);
|
||||
} else {
|
||||
ptrace::cont(pid, None).map_err(|_| ExecError::Died(None))?;
|
||||
ptrace::cont(pid, signal).map_err(|_| ExecEnd(None))?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Grabs the access that caused a segfault and logs it down if it's to our memory,
|
||||
/// or kills the child and returns the appropriate error otherwise.
|
||||
fn handle_segfault(
|
||||
pid: unistd::Pid,
|
||||
ch_pages: &[usize],
|
||||
ch_stack: usize,
|
||||
page_size: usize,
|
||||
cs: &capstone::Capstone,
|
||||
acc_events: &mut Vec<AccessEvent>,
|
||||
) -> Result<(), ExecEnd> {
|
||||
/// This is just here to not pollute the main namespace with `capstone::prelude::*`.
|
||||
#[inline]
|
||||
fn capstone_disassemble(
|
||||
instr: &[u8],
|
||||
addr: usize,
|
||||
cs: &capstone::Capstone,
|
||||
acc_events: &mut Vec<AccessEvent>,
|
||||
) -> capstone::CsResult<()> {
|
||||
use capstone::prelude::*;
|
||||
|
||||
// The arch_detail is what we care about, but it relies on these temporaries
|
||||
// that we can't drop. 0x1000 is the default base address for Captsone, and
|
||||
// we're expecting 1 instruction.
|
||||
let insns = cs.disasm_count(instr, 0x1000, 1)?;
|
||||
let ins_detail = cs.insn_detail(&insns[0])?;
|
||||
let arch_detail = ins_detail.arch_detail();
|
||||
|
||||
for op in arch_detail.operands() {
|
||||
match op {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
arch::ArchOperand::X86Operand(x86_operand) => {
|
||||
match x86_operand.op_type {
|
||||
// We only care about memory accesses
|
||||
arch::x86::X86OperandType::Mem(_) => {
|
||||
let push = addr..addr.strict_add(usize::from(x86_operand.size));
|
||||
// It's called a "RegAccessType" but it also applies to memory
|
||||
let acc_ty = x86_operand.access.unwrap();
|
||||
if acc_ty.is_readable() {
|
||||
acc_events.push(AccessEvent::Read(push.clone()));
|
||||
}
|
||||
if acc_ty.is_writable() {
|
||||
acc_events.push(AccessEvent::Write(push));
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
arch::ArchOperand::Arm64Operand(arm64_operand) => {
|
||||
// Annoyingly, we don't always get the size here, so just be pessimistic for now.
|
||||
match arm64_operand.op_type {
|
||||
arch::arm64::Arm64OperandType::Mem(_) => {
|
||||
// B = 1 byte, H = 2 bytes, S = 4 bytes, D = 8 bytes, Q = 16 bytes.
|
||||
let size = match arm64_operand.vas {
|
||||
// Not an fp/simd instruction.
|
||||
arch::arm64::Arm64Vas::ARM64_VAS_INVALID => ARCH_WORD_SIZE,
|
||||
// 1 byte.
|
||||
arch::arm64::Arm64Vas::ARM64_VAS_1B => 1,
|
||||
// 2 bytes.
|
||||
arch::arm64::Arm64Vas::ARM64_VAS_1H => 2,
|
||||
// 4 bytes.
|
||||
arch::arm64::Arm64Vas::ARM64_VAS_4B
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_2H
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_1S => 4,
|
||||
// 8 bytes.
|
||||
arch::arm64::Arm64Vas::ARM64_VAS_8B
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_4H
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_2S
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_1D => 8,
|
||||
// 16 bytes.
|
||||
arch::arm64::Arm64Vas::ARM64_VAS_16B
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_8H
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_4S
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_2D
|
||||
| arch::arm64::Arm64Vas::ARM64_VAS_1Q => 16,
|
||||
};
|
||||
let push = addr..addr.strict_add(size);
|
||||
// FIXME: This now has access type info in the latest
|
||||
// git version of capstone because this pissed me off
|
||||
// and I added it. Change this when it updates.
|
||||
acc_events.push(AccessEvent::Read(push.clone()));
|
||||
acc_events.push(AccessEvent::Write(push));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
#[cfg(target_arch = "arm")]
|
||||
arch::ArchOperand::ArmOperand(arm_operand) =>
|
||||
match arm_operand.op_type {
|
||||
arch::arm::ArmOperandType::Mem(_) => {
|
||||
// We don't get info on the size of the access, but
|
||||
// we're at least told if it's a vector instruction.
|
||||
let size = if arm_operand.vector_index.is_some() {
|
||||
ARCH_MAX_ACCESS_SIZE
|
||||
} else {
|
||||
ARCH_WORD_SIZE
|
||||
};
|
||||
let push = addr..addr.strict_add(size);
|
||||
let acc_ty = arm_operand.access.unwrap();
|
||||
if acc_ty.is_readable() {
|
||||
acc_events.push(AccessEvent::Read(push.clone()));
|
||||
}
|
||||
if acc_ty.is_writable() {
|
||||
acc_events.push(AccessEvent::Write(push));
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
},
|
||||
#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
|
||||
arch::ArchOperand::RiscVOperand(risc_voperand) => {
|
||||
match risc_voperand {
|
||||
arch::riscv::RiscVOperand::Mem(_) => {
|
||||
// We get basically no info here.
|
||||
let push = addr..addr.strict_add(size);
|
||||
acc_events.push(AccessEvent::Read(push.clone()));
|
||||
acc_events.push(AccessEvent::Write(push));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Get information on what caused the segfault. This contains the address
|
||||
// that triggered it.
|
||||
let siginfo = ptrace::getsiginfo(pid).unwrap();
|
||||
// All x86, ARM, etc. instructions only have at most one memory operand
|
||||
// (thankfully!)
|
||||
// SAFETY: si_addr is safe to call.
|
||||
let addr = unsafe { siginfo.si_addr().addr() };
|
||||
let page_addr = addr.strict_sub(addr.strict_rem(page_size));
|
||||
|
||||
if ch_pages.iter().any(|pg| (*pg..pg.strict_add(page_size)).contains(&addr)) {
|
||||
// Overall structure:
|
||||
// - Get the address that caused the segfault
|
||||
// - Unprotect the memory
|
||||
// - Step 1 instruction
|
||||
// - Parse executed code to estimate size & type of access
|
||||
// - Reprotect the memory
|
||||
// - Continue
|
||||
|
||||
// Ensure the stack is properly zeroed out!
|
||||
for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) {
|
||||
ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap();
|
||||
}
|
||||
|
||||
// Guard against both architectures with upwards and downwards-growing stacks.
|
||||
let stack_ptr = ch_stack.strict_add(FAKE_STACK_SIZE / 2);
|
||||
let regs_bak = ptrace::getregs(pid).unwrap();
|
||||
let mut new_regs = regs_bak;
|
||||
let ip_prestep = regs_bak.ip();
|
||||
|
||||
// Move the instr ptr into the deprotection code.
|
||||
#[expect(clippy::as_conversions)]
|
||||
new_regs.set_ip(mempr_off as usize);
|
||||
// Don't mess up the stack by accident!
|
||||
new_regs.set_sp(stack_ptr);
|
||||
|
||||
// Modify the PAGE_ADDR global on the child process to point to the page
|
||||
// that we want unprotected.
|
||||
ptrace::write(
|
||||
pid,
|
||||
(&raw const PAGE_ADDR).cast_mut().cast(),
|
||||
libc::c_long::try_from(page_addr).unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Check if we also own the next page, and if so unprotect it in case
|
||||
// the access spans the page boundary.
|
||||
let flag = if ch_pages.contains(&page_addr.strict_add(page_size)) { 2 } else { 1 };
|
||||
ptrace::write(pid, (&raw const PAGE_COUNT).cast_mut().cast(), flag).unwrap();
|
||||
|
||||
ptrace::setregs(pid, new_regs).unwrap();
|
||||
|
||||
// Our mempr_* functions end with a raise(SIGSTOP).
|
||||
wait_for_signal(Some(pid), signal::SIGSTOP, true)?;
|
||||
|
||||
// Step 1 instruction.
|
||||
ptrace::setregs(pid, regs_bak).unwrap();
|
||||
ptrace::step(pid, None).unwrap();
|
||||
// Don't use wait_for_signal here since 1 instruction doesn't give room
|
||||
// for any uncertainty + we don't want it `cont()`ing randomly by accident
|
||||
// Also, don't let it continue with unprotected memory if something errors!
|
||||
let _ = wait::waitid(wait::Id::Pid(pid), WAIT_FLAGS).map_err(|_| ExecEnd(None))?;
|
||||
|
||||
// Zero out again to be safe
|
||||
for a in (ch_stack..ch_stack.strict_add(FAKE_STACK_SIZE)).step_by(ARCH_WORD_SIZE) {
|
||||
ptrace::write(pid, std::ptr::with_exposed_provenance_mut(a), 0).unwrap();
|
||||
}
|
||||
|
||||
// Save registers and grab the bytes that were executed. This would
|
||||
// be really nasty if it was a jump or similar but those thankfully
|
||||
// won't do memory accesses and so can't trigger this!
|
||||
let regs_bak = ptrace::getregs(pid).unwrap();
|
||||
new_regs = regs_bak;
|
||||
let ip_poststep = regs_bak.ip();
|
||||
// We need to do reads/writes in word-sized chunks.
|
||||
let diff = (ip_poststep.strict_sub(ip_prestep)).div_ceil(ARCH_WORD_SIZE);
|
||||
let instr = (ip_prestep..ip_prestep.strict_add(diff)).fold(vec![], |mut ret, ip| {
|
||||
// This only needs to be a valid pointer in the child process, not ours.
|
||||
ret.append(
|
||||
&mut ptrace::read(pid, std::ptr::without_provenance_mut(ip))
|
||||
.unwrap()
|
||||
.to_ne_bytes()
|
||||
.to_vec(),
|
||||
);
|
||||
ret
|
||||
});
|
||||
|
||||
// Now figure out the size + type of access and log it down
|
||||
// This will mark down e.g. the same area being read multiple times,
|
||||
// since it's more efficient to compress the accesses at the end.
|
||||
if capstone_disassemble(&instr, addr, cs, acc_events).is_err() {
|
||||
// Read goes first because we need to be pessimistic.
|
||||
acc_events.push(AccessEvent::Read(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
|
||||
acc_events.push(AccessEvent::Write(addr..addr.strict_add(ARCH_MAX_ACCESS_SIZE)));
|
||||
}
|
||||
|
||||
// Reprotect everything and continue.
|
||||
#[expect(clippy::as_conversions)]
|
||||
new_regs.set_ip(mempr_on as usize);
|
||||
new_regs.set_sp(stack_ptr);
|
||||
ptrace::setregs(pid, new_regs).unwrap();
|
||||
wait_for_signal(Some(pid), signal::SIGSTOP, true)?;
|
||||
|
||||
ptrace::setregs(pid, regs_bak).unwrap();
|
||||
ptrace::syscall(pid, None).unwrap();
|
||||
Ok(())
|
||||
} else {
|
||||
// This was a real segfault, so print some debug info and quit.
|
||||
let regs = ptrace::getregs(pid).unwrap();
|
||||
eprintln!("Segfault occurred during FFI at {addr:#018x}");
|
||||
eprintln!("Expected access on pages: {ch_pages:#018x?}");
|
||||
eprintln!("Register dump: {regs:#x?}");
|
||||
ptrace::kill(pid).unwrap();
|
||||
Err(ExecEnd(None))
|
||||
}
|
||||
}
|
||||
|
||||
// We only get dropped into these functions via offsetting the instr pointer
|
||||
// manually, so we *must not ever* unwind from them.
|
||||
|
||||
/// Disables protections on the page whose address is currently in `PAGE_ADDR`.
|
||||
///
|
||||
/// SAFETY: `PAGE_ADDR` should be set to a page-aligned pointer to an owned page,
|
||||
/// `PAGE_SIZE` should be the host pagesize, and the range from `PAGE_ADDR` to
|
||||
/// `PAGE_SIZE` * `PAGE_COUNT` must be owned and allocated memory. No other threads
|
||||
/// should be running.
|
||||
pub unsafe extern "C" fn mempr_off() {
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
// Again, cannot allow unwinds to happen here.
|
||||
let len = PAGE_SIZE.load(Ordering::Relaxed).saturating_mul(PAGE_COUNT.load(Ordering::Relaxed));
|
||||
// SAFETY: Upheld by "caller".
|
||||
unsafe {
|
||||
// It's up to the caller to make sure this doesn't actually overflow, but
|
||||
// we mustn't unwind from here, so...
|
||||
if libc::mprotect(
|
||||
PAGE_ADDR.load(Ordering::Relaxed).cast(),
|
||||
len,
|
||||
libc::PROT_READ | libc::PROT_WRITE,
|
||||
) != 0
|
||||
{
|
||||
// Can't return or unwind, but we can do this.
|
||||
std::process::exit(-1);
|
||||
}
|
||||
}
|
||||
// If this fails somehow we're doomed.
|
||||
if signal::raise(signal::SIGSTOP).is_err() {
|
||||
std::process::exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
/// Reenables protection on the page set by `PAGE_ADDR`.
|
||||
///
|
||||
/// SAFETY: See `mempr_off()`.
|
||||
pub unsafe extern "C" fn mempr_on() {
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
let len = PAGE_SIZE.load(Ordering::Relaxed).wrapping_mul(PAGE_COUNT.load(Ordering::Relaxed));
|
||||
// SAFETY: Upheld by "caller".
|
||||
unsafe {
|
||||
if libc::mprotect(PAGE_ADDR.load(Ordering::Relaxed).cast(), len, libc::PROT_NONE) != 0 {
|
||||
std::process::exit(-1);
|
||||
}
|
||||
}
|
||||
if signal::raise(signal::SIGSTOP).is_err() {
|
||||
std::process::exit(-1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue