Rollup merge of #150412 - the8472:pidfd-spawn, r=tgross35

use PIDFD_GET_INFO ioctl when available

This way using pidfd_spawnp won't have to rely on procfs, avoiding an unpleasant edge-case where the child is spawned but we can't get the pid. And pidfd.{try_}wait will be able to return the exit status even after a process has been reaped. At least on newer kernels.

Tracking issue: https://github.com/rust-lang/rust/issues/82971
This commit is contained in:
Jonathan Brouwer 2026-01-06 16:19:41 +01:00 committed by GitHub
commit 3b0d35f94b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 132 additions and 57 deletions

View file

@ -67,8 +67,10 @@ impl PidFd {
/// Waits for the child to exit completely, returning the status that it exited with. /// Waits for the child to exit completely, returning the status that it exited with.
/// ///
/// Unlike [`Child::wait`] it does not ensure that the stdin handle is closed. /// Unlike [`Child::wait`] it does not ensure that the stdin handle is closed.
/// Additionally it will not return an `ExitStatus` if the child ///
/// has already been reaped. Instead an error will be returned. /// Additionally on kernels prior to 6.15 only the first attempt to
/// reap a child will return an ExitStatus, further attempts
/// will return an Error.
/// ///
/// [`Child::wait`]: process::Child::wait /// [`Child::wait`]: process::Child::wait
pub fn wait(&self) -> Result<ExitStatus> { pub fn wait(&self) -> Result<ExitStatus> {
@ -77,8 +79,8 @@ impl PidFd {
/// Attempts to collect the exit status of the child if it has already exited. /// Attempts to collect the exit status of the child if it has already exited.
/// ///
/// Unlike [`Child::try_wait`] this method will return an Error /// On kernels prior to 6.15, and unlike [`Child::try_wait`], only the first attempt
/// if the child has already been reaped. /// to reap a child will return an ExitStatus, further attempts will return an Error.
/// ///
/// [`Child::try_wait`]: process::Child::try_wait /// [`Child::try_wait`]: process::Child::try_wait
pub fn try_wait(&self) -> Result<Option<ExitStatus>> { pub fn try_wait(&self) -> Result<Option<ExitStatus>> {

View file

@ -1,5 +1,5 @@
use crate::io; use crate::io;
use crate::os::fd::{AsRawFd, FromRawFd, RawFd}; use crate::os::fd::{AsRawFd, FromRawFd, IntoRawFd, RawFd};
use crate::sys::fd::FileDesc; use crate::sys::fd::FileDesc;
use crate::sys::process::ExitStatus; use crate::sys::process::ExitStatus;
use crate::sys::{AsInner, FromInner, IntoInner, cvt}; use crate::sys::{AsInner, FromInner, IntoInner, cvt};
@ -15,6 +15,73 @@ impl PidFd {
self.send_signal(libc::SIGKILL) self.send_signal(libc::SIGKILL)
} }
#[cfg(any(test, target_env = "gnu", target_env = "musl"))]
pub(crate) fn current_process() -> io::Result<PidFd> {
let pid = crate::process::id();
let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, pid, 0) })?;
Ok(unsafe { PidFd::from_raw_fd(pidfd as RawFd) })
}
#[cfg(any(test, target_env = "gnu", target_env = "musl"))]
pub(crate) fn pid(&self) -> io::Result<u32> {
use crate::sys::weak::weak;
// since kernel 6.13
// https://lore.kernel.org/all/20241010155401.2268522-1-luca.boccassi@gmail.com/
let mut pidfd_info: libc::pidfd_info = unsafe { crate::mem::zeroed() };
pidfd_info.mask = libc::PIDFD_INFO_PID as u64;
match cvt(unsafe { libc::ioctl(self.0.as_raw_fd(), libc::PIDFD_GET_INFO, &mut pidfd_info) })
{
Ok(_) => {}
Err(e) if e.raw_os_error() == Some(libc::EINVAL) => {
// kernel doesn't support that ioctl, try the glibc helper that looks at procfs
weak!(
fn pidfd_getpid(pidfd: RawFd) -> libc::pid_t;
);
if let Some(pidfd_getpid) = pidfd_getpid.get() {
let pid: libc::c_int = cvt(unsafe { pidfd_getpid(self.0.as_raw_fd()) })?;
return Ok(pid as u32);
}
return Err(e);
}
Err(e) => return Err(e),
}
Ok(pidfd_info.pid)
}
fn exit_for_reaped_child(&self) -> io::Result<ExitStatus> {
// since kernel 6.15
// https://lore.kernel.org/linux-fsdevel/20250305-work-pidfs-kill_on_last_close-v3-0-c8c3d8361705@kernel.org/T/
let mut pidfd_info: libc::pidfd_info = unsafe { crate::mem::zeroed() };
pidfd_info.mask = libc::PIDFD_INFO_EXIT as u64;
cvt(unsafe { libc::ioctl(self.0.as_raw_fd(), libc::PIDFD_GET_INFO, &mut pidfd_info) })?;
Ok(ExitStatus::new(pidfd_info.exit_code))
}
fn waitid(&self, options: libc::c_int) -> io::Result<Option<ExitStatus>> {
let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() };
let r = cvt(unsafe {
libc::waitid(libc::P_PIDFD, self.0.as_raw_fd() as u32, &mut siginfo, options)
});
match r {
Err(waitid_err) if waitid_err.raw_os_error() == Some(libc::ECHILD) => {
// already reaped
match self.exit_for_reaped_child() {
Ok(exit_status) => return Ok(Some(exit_status)),
Err(_) => return Err(waitid_err),
}
}
Err(e) => return Err(e),
Ok(_) => {}
}
if unsafe { siginfo.si_pid() } == 0 {
Ok(None)
} else {
Ok(Some(ExitStatus::from_waitid_siginfo(siginfo)))
}
}
pub(crate) fn send_signal(&self, signal: i32) -> io::Result<()> { pub(crate) fn send_signal(&self, signal: i32) -> io::Result<()> {
cvt(unsafe { cvt(unsafe {
libc::syscall( libc::syscall(
@ -29,29 +96,15 @@ impl PidFd {
} }
pub fn wait(&self) -> io::Result<ExitStatus> { pub fn wait(&self) -> io::Result<ExitStatus> {
let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() }; let r = self.waitid(libc::WEXITED)?;
cvt(unsafe { match r {
libc::waitid(libc::P_PIDFD, self.0.as_raw_fd() as u32, &mut siginfo, libc::WEXITED) Some(exit_status) => Ok(exit_status),
})?; None => unreachable!("waitid with WEXITED should not return None"),
Ok(ExitStatus::from_waitid_siginfo(siginfo)) }
} }
pub fn try_wait(&self) -> io::Result<Option<ExitStatus>> { pub fn try_wait(&self) -> io::Result<Option<ExitStatus>> {
let mut siginfo: libc::siginfo_t = unsafe { crate::mem::zeroed() }; self.waitid(libc::WEXITED | libc::WNOHANG)
cvt(unsafe {
libc::waitid(
libc::P_PIDFD,
self.0.as_raw_fd() as u32,
&mut siginfo,
libc::WEXITED | libc::WNOHANG,
)
})?;
if unsafe { siginfo.si_pid() } == 0 {
Ok(None)
} else {
Ok(Some(ExitStatus::from_waitid_siginfo(siginfo)))
}
} }
} }
@ -78,3 +131,9 @@ impl FromRawFd for PidFd {
Self(FileDesc::from_raw_fd(fd)) Self(FileDesc::from_raw_fd(fd))
} }
} }
impl IntoRawFd for PidFd {
fn into_raw_fd(self) -> RawFd {
self.0.into_raw_fd()
}
}

View file

@ -1,8 +1,11 @@
use super::PidFd as InternalPidFd;
use crate::assert_matches::assert_matches; use crate::assert_matches::assert_matches;
use crate::os::fd::{AsRawFd, RawFd}; use crate::io::ErrorKind;
use crate::os::fd::AsRawFd;
use crate::os::linux::process::{ChildExt, CommandExt as _}; use crate::os::linux::process::{ChildExt, CommandExt as _};
use crate::os::unix::process::{CommandExt as _, ExitStatusExt}; use crate::os::unix::process::{CommandExt as _, ExitStatusExt};
use crate::process::Command; use crate::process::Command;
use crate::sys::AsInner;
#[test] #[test]
fn test_command_pidfd() { fn test_command_pidfd() {
@ -48,11 +51,22 @@ fn test_command_pidfd() {
let mut cmd = Command::new("false"); let mut cmd = Command::new("false");
let mut child = unsafe { cmd.pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap(); let mut child = unsafe { cmd.pre_exec(|| Ok(())) }.create_pidfd(true).spawn().unwrap();
assert!(child.id() > 0 && child.id() < -1i32 as u32); let id = child.id();
assert!(id > 0 && id < -1i32 as u32, "spawning with pidfd still returns a sane pid");
if pidfd_open_available { if pidfd_open_available {
assert!(child.pidfd().is_ok()) assert!(child.pidfd().is_ok())
} }
if let Ok(pidfd) = child.pidfd() {
match pidfd.as_inner().pid() {
Ok(pid) => assert_eq!(pid, id),
Err(e) if e.kind() == ErrorKind::InvalidInput => { /* older kernel */ }
Err(e) => panic!("unexpected error getting pid from pidfd: {}", e),
}
}
child.wait().expect("error waiting on child"); child.wait().expect("error waiting on child");
} }
@ -77,9 +91,15 @@ fn test_pidfd() {
assert_eq!(status.signal(), Some(libc::SIGKILL)); assert_eq!(status.signal(), Some(libc::SIGKILL));
// Trying to wait again for a reaped child is safe since there's no pid-recycling race. // Trying to wait again for a reaped child is safe since there's no pid-recycling race.
// But doing so will return an error. // But doing so may return an error.
let res = fd.wait(); let res = fd.wait();
assert_matches!(res, Err(e) if e.raw_os_error() == Some(libc::ECHILD)); match res {
// older kernels
Err(e) if e.raw_os_error() == Some(libc::ECHILD) => {}
// 6.15+
Ok(exit) if exit.signal() == Some(libc::SIGKILL) => {}
other => panic!("expected ECHILD error, got {:?}", other),
}
// Ditto for additional attempts to kill an already-dead child. // Ditto for additional attempts to kill an already-dead child.
let res = fd.kill(); let res = fd.kill();
@ -87,13 +107,5 @@ fn test_pidfd() {
} }
fn probe_pidfd_support() -> bool { fn probe_pidfd_support() -> bool {
// pidfds require the pidfd_open syscall InternalPidFd::current_process().is_ok()
let our_pid = crate::process::id();
let pidfd = unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) };
if pidfd >= 0 {
unsafe { libc::close(pidfd as RawFd) };
true
} else {
false
}
} }

View file

@ -482,10 +482,6 @@ impl Command {
) -> libc::c_int; ) -> libc::c_int;
); );
weak!(
fn pidfd_getpid(pidfd: libc::c_int) -> libc::c_int;
);
static PIDFD_SUPPORTED: Atomic<u8> = AtomicU8::new(0); static PIDFD_SUPPORTED: Atomic<u8> = AtomicU8::new(0);
const UNKNOWN: u8 = 0; const UNKNOWN: u8 = 0;
const SPAWN: u8 = 1; const SPAWN: u8 = 1;
@ -502,24 +498,26 @@ impl Command {
} }
if support == UNKNOWN { if support == UNKNOWN {
support = NO; support = NO;
let our_pid = crate::process::id();
let pidfd = cvt(unsafe { libc::syscall(libc::SYS_pidfd_open, our_pid, 0) } as c_int); match PidFd::current_process() {
match pidfd {
Ok(pidfd) => { Ok(pidfd) => {
// if pidfd_open works then we at least know the fork path is available.
support = FORK_EXEC; support = FORK_EXEC;
if let Some(Ok(pid)) = pidfd_getpid.get().map(|f| cvt(unsafe { f(pidfd) } as i32)) { // but for the fast path we need both spawnp and the
if pidfd_spawnp.get().is_some() && pid as u32 == our_pid { // pidfd -> pid conversion to work.
support = SPAWN if pidfd_spawnp.get().is_some() && let Ok(pid) = pidfd.pid() {
} assert_eq!(pid, crate::process::id(), "sanity check");
support = SPAWN;
} }
unsafe { libc::close(pidfd) };
} }
Err(e) if e.raw_os_error() == Some(libc::EMFILE) => { Err(e) if e.raw_os_error() == Some(libc::EMFILE) => {
// We're temporarily(?) out of file descriptors. In this case obtaining a pidfd would also fail // We're temporarily(?) out of file descriptors. In this case pidfd_spawnp would also fail
// Don't update the support flag so we can probe again later. // Don't update the support flag so we can probe again later.
return Err(e) return Err(e)
} }
_ => {} _ => {
// pidfd_open not available? likely an old kernel without pidfd support.
}
} }
PIDFD_SUPPORTED.store(support, Ordering::Relaxed); PIDFD_SUPPORTED.store(support, Ordering::Relaxed);
if support == FORK_EXEC { if support == FORK_EXEC {
@ -791,13 +789,17 @@ impl Command {
} }
spawn_res?; spawn_res?;
let pid = match cvt(pidfd_getpid.get().unwrap()(pidfd)) { use crate::os::fd::{FromRawFd, IntoRawFd};
let pidfd = PidFd::from_raw_fd(pidfd);
let pid = match pidfd.pid() {
Ok(pid) => pid, Ok(pid) => pid,
Err(e) => { Err(e) => {
// The child has been spawned and we are holding its pidfd. // The child has been spawned and we are holding its pidfd.
// But we cannot obtain its pid even though pidfd_getpid support was verified earlier. // But we cannot obtain its pid even though pidfd_spawnp and getpid support
// This might happen if libc can't open procfs because the file descriptor limit has been reached. // was verified earlier.
libc::close(pidfd); // This is quite unlikely, but might happen if the ioctl is not supported,
// glibc tries to use procfs and we're out of file descriptors.
return Err(Error::new( return Err(Error::new(
e.kind(), e.kind(),
"pidfd_spawnp succeeded but the child's PID could not be obtained", "pidfd_spawnp succeeded but the child's PID could not be obtained",
@ -805,7 +807,7 @@ impl Command {
} }
}; };
return Ok(Some(Process::new(pid, pidfd))); return Ok(Some(Process::new(pid as i32, pidfd.into_raw_fd())));
} }
// Safety: -1 indicates we don't have a pidfd. // Safety: -1 indicates we don't have a pidfd.