Merge pull request #4549 from RalfJung/mprotect

native-lib mode: avoid unsoundness due to mrpotect
This commit is contained in:
Ralf Jung 2025-08-30 13:27:16 +00:00 committed by GitHub
commit 1feabac1db
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 21 additions and 39 deletions

View file

@ -90,14 +90,6 @@ impl Supervisor {
// Unwinding might be messed up due to partly protected memory, so let's abort if something
// breaks inside here.
let res = std::panic::abort_unwind(|| {
// SAFETY: We do not access machine memory past this point until the
// supervisor is ready to allow it.
// FIXME: this is sketchy, as technically the memory is still in the Rust Abstract Machine,
// and the compiler would be allowed to reorder accesses below this block...
unsafe {
Self::protect_pages(alloc.pages(), mman::ProtFlags::PROT_NONE).unwrap();
}
// Send over the info.
// NB: if we do not wait to receive a blank confirmation response, it is
// possible that the supervisor is alerted of the SIGSTOP *before* it has
@ -110,16 +102,14 @@ impl Supervisor {
// count.
signal::raise(signal::SIGSTOP).unwrap();
let res = f();
// SAFETY: We have coordinated with the supervisor to ensure that this memory will keep
// working as normal, just with extra tracing. So even if the compiler moves memory
// accesses down to after the `mprotect`, they won't actually segfault.
unsafe {
Self::protect_pages(alloc.pages(), mman::ProtFlags::PROT_NONE).unwrap();
}
// We can't use IPC channels here to signal that FFI mode has ended,
// since they might allocate memory which could get us stuck in a SIGTRAP
// with no easy way out! While this could be worked around, it is much
// simpler and more robust to simply use the signals which are left for
// arbitrary usage. Since this will block until we are continued by the
// supervisor, we can assume past this point that everything is back to
// normal.
signal::raise(signal::SIGUSR1).unwrap();
let res = f();
// SAFETY: We set memory back to normal, so this is safe.
unsafe {
@ -130,6 +120,12 @@ impl Supervisor {
.unwrap();
}
// Signal the supervisor that we are done. Will block until the supervisor continues us.
// This will also shut down the segfault handler, so it's important that all memory is
// reset back to normal above. There must not be a window in time where accessing the
// pages we protected above actually causes the program to abort.
signal::raise(signal::SIGUSR1).unwrap();
res
});

View file

@ -132,10 +132,10 @@ impl Iterator for ChildListener {
return Some(ExecEvent::Syscall(pid));
},
// Child with the given pid was stopped by the given signal.
// It's somewhat dubious when this is returned instead of
// WaitStatus::Stopped, but for our purposes they are the
// same thing.
wait::WaitStatus::PtraceEvent(pid, signal, _) =>
// It's somewhat unclear when which of these two is returned;
// we just treat them the same.
wait::WaitStatus::Stopped(pid, signal)
| wait::WaitStatus::PtraceEvent(pid, signal, _) =>
if self.attached {
// This is our end-of-FFI signal!
if signal == signal::SIGUSR1 {
@ -148,19 +148,6 @@ impl Iterator for ChildListener {
// Just pass along the signal.
ptrace::cont(pid, signal).unwrap();
},
// Child was stopped at the given signal. Same logic as for
// WaitStatus::PtraceEvent.
wait::WaitStatus::Stopped(pid, signal) =>
if self.attached {
if signal == signal::SIGUSR1 {
self.attached = false;
return Some(ExecEvent::End);
} else {
return Some(ExecEvent::Status(pid, signal));
}
} else {
ptrace::cont(pid, signal).unwrap();
},
_ => (),
},
// This case should only trigger when all children died.
@ -250,7 +237,7 @@ pub fn sv_loop(
// We can't trust simply calling `Pid::this()` in the child process to give the right
// PID for us, so we get it this way.
curr_pid = wait_for_signal(None, signal::SIGSTOP, InitialCont::No).unwrap();
// Continue until next syscall.
ptrace::syscall(curr_pid, None).unwrap();
}
// Child wants to end tracing.
@ -289,8 +276,7 @@ pub fn sv_loop(
}
}
},
// Child entered a syscall; we wait for exits inside of this, so it
// should never trigger on return from a syscall we care about.
// Child entered or exited a syscall. For now we ignore this and just continue.
ExecEvent::Syscall(pid) => {
ptrace::syscall(pid, None).unwrap();
}
@ -344,8 +330,8 @@ fn wait_for_signal(
return Err(ExecEnd(Some(code)));
}
wait::WaitStatus::Signaled(_, _, _) => return Err(ExecEnd(None)),
wait::WaitStatus::Stopped(pid, signal) => (signal, pid),
wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid),
wait::WaitStatus::Stopped(pid, signal)
| wait::WaitStatus::PtraceEvent(pid, signal, _) => (signal, pid),
// This covers PtraceSyscall and variants that are impossible with
// the flags set (e.g. WaitStatus::StillAlive).
_ => {