Auto merge of #3698 - folkertdev:sched-setaffinity, r=RalfJung

implement `libc::sched_setaffinity` on linux fixes https://github.com/rust-lang/miri/issues/2749 the implementation, like `libc::sched_getaffinity`, just always returns `EINVAL`, which kind of simulates a device with zero cpus. I believe the idea is that callers of this function always do it to optimize, so they are likely to gracefully recover from this function returning an error. based on the libc crate, these functions are also available on android and freebsd (but not on macos or windows). So should the implementation of the `sched_*` functions just be copied to the android and freebsd shims?
2024-07-06 12:30:31 +00:00 · 2024-07-06 12:30:31 +00:00 · 838b8d5898
commit 838b8d5898
parent d358f5dcf9 d65e3688df
13 changed files with 507 additions and 37 deletions
--- a/src/tools/miri/src/bin/miri.rs
+++ b/src/tools/miri/src/bin/miri.rs
@ -592,6 +592,9 @@ fn main() {
            let num_cpus = param
                .parse::<u32>()
                .unwrap_or_else(|err| show_error!("-Zmiri-num-cpus requires a `u32`: {}", err));
+            if !(1..=miri::MAX_CPUS).contains(&usize::try_from(num_cpus).unwrap()) {
+                show_error!("-Zmiri-num-cpus must be in the range 1..={}", miri::MAX_CPUS);
+            }
            miri_config.num_cpus = num_cpus;
        } else if let Some(param) = arg.strip_prefix("-Zmiri-force-page-size=") {
            let page_size = param.parse::<u64>().unwrap_or_else(|err| {
--- a/src/tools/miri/src/concurrency/cpu_affinity.rs
+++ b/src/tools/miri/src/concurrency/cpu_affinity.rs
@ -0,0 +1,90 @@
+use rustc_middle::ty::layout::LayoutOf;
+use rustc_target::abi::Endian;
+
+use crate::*;
+
+/// The maximum number of CPUs supported by miri.
+///
+/// This value is compatible with the libc `CPU_SETSIZE` constant and corresponds to the number
+/// of CPUs that a `cpu_set_t` can contain.
+///
+/// Real machines can have more CPUs than this number, and there exist APIs to set their affinity,
+/// but this is not currently supported by miri.
+pub const MAX_CPUS: usize = 1024;
+
+/// A thread's CPU affinity mask determines the set of CPUs on which it is eligible to run.
+// the actual representation depends on the target's endianness and pointer width.
+// See CpuAffinityMask::set for details
+#[derive(Clone)]
+pub(crate) struct CpuAffinityMask([u8; Self::CPU_MASK_BYTES]);
+
+impl CpuAffinityMask {
+    pub(crate) const CPU_MASK_BYTES: usize = MAX_CPUS / 8;
+
+    pub fn new<'tcx>(cx: &impl LayoutOf<'tcx>, cpu_count: u32) -> Self {
+        let mut this = Self([0; Self::CPU_MASK_BYTES]);
+
+        // the default affinity mask includes only the available CPUs
+        for i in 0..cpu_count as usize {
+            this.set(cx, i);
+        }
+
+        this
+    }
+
+    pub fn chunk_size<'tcx>(cx: &impl LayoutOf<'tcx>) -> u64 {
+        // The actual representation of the CpuAffinityMask is [c_ulong; _].
+        let ulong = helpers::path_ty_layout(cx, &["core", "ffi", "c_ulong"]);
+        ulong.size.bytes()
+    }
+
+    fn set<'tcx>(&mut self, cx: &impl LayoutOf<'tcx>, cpu: usize) {
+        // we silently ignore CPUs that are out of bounds. This matches the behavior of
+        // `sched_setaffinity` with a mask that specifies more than `CPU_SETSIZE` CPUs.
+        if cpu >= MAX_CPUS {
+            return;
+        }
+
+        // The actual representation of the CpuAffinityMask is [c_ulong; _].
+        // Within the array elements, we need to use the endianness of the target.
+        let target = &cx.tcx().sess.target;
+        match Self::chunk_size(cx) {
+            4 => {
+                let start = cpu / 32 * 4; // first byte of the correct u32
+                let chunk = self.0[start..].first_chunk_mut::<4>().unwrap();
+                let offset = cpu % 32;
+                *chunk = match target.options.endian {
+                    Endian::Little => (u32::from_le_bytes(*chunk) | 1 << offset).to_le_bytes(),
+                    Endian::Big => (u32::from_be_bytes(*chunk) | 1 << offset).to_be_bytes(),
+                };
+            }
+            8 => {
+                let start = cpu / 64 * 8; // first byte of the correct u64
+                let chunk = self.0[start..].first_chunk_mut::<8>().unwrap();
+                let offset = cpu % 64;
+                *chunk = match target.options.endian {
+                    Endian::Little => (u64::from_le_bytes(*chunk) | 1 << offset).to_le_bytes(),
+                    Endian::Big => (u64::from_be_bytes(*chunk) | 1 << offset).to_be_bytes(),
+                };
+            }
+            other => bug!("chunk size not supported: {other}"),
+        };
+    }
+
+    pub fn as_slice(&self) -> &[u8] {
+        self.0.as_slice()
+    }
+
+    pub fn from_array<'tcx>(
+        cx: &impl LayoutOf<'tcx>,
+        cpu_count: u32,
+        bytes: [u8; Self::CPU_MASK_BYTES],
+    ) -> Option<Self> {
+        // mask by what CPUs are actually available
+        let default = Self::new(cx, cpu_count);
+        let masked = std::array::from_fn(|i| bytes[i] & default.0[i]);
+
+        // at least one thread must be set for the input to be valid
+        masked.iter().any(|b| *b != 0).then_some(Self(masked))
+    }
+}
--- a/src/tools/miri/src/concurrency/mod.rs
+++ b/src/tools/miri/src/concurrency/mod.rs
@ -1,3 +1,4 @@
+pub mod cpu_affinity;
 pub mod data_race;
 pub mod init_once;
 mod range_object_map;
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@ -936,6 +936,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
        // After this all accesses will be treated as occurring in the new thread.
        let old_thread_id = this.machine.threads.set_active_thread_id(new_thread_id);

+        // The child inherits its parent's cpu affinity.
+        if let Some(cpuset) = this.machine.thread_cpu_affinity.get(&old_thread_id).cloned() {
+            this.machine.thread_cpu_affinity.insert(new_thread_id, cpuset);
+        }
+
        // Perform the function pointer load in the new thread frame.
        let instance = this.get_ptr_fn(start_routine)?.as_instance()?;

--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@ -282,7 +282,8 @@ pub fn create_ecx<'tcx>(
    })?;

    // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
-    let sentinel = ecx.try_resolve_path(&["core", "ascii", "escape_default"], Namespace::ValueNS);
+    let sentinel =
+        helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
    if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
        tcx.dcx().fatal(
            "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \
--- a/src/tools/miri/src/helpers.rs
+++ b/src/tools/miri/src/helpers.rs
@ -18,6 +18,7 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::middle::dependency_format::Linkage;
 use rustc_middle::middle::exported_symbols::ExportedSymbol;
 use rustc_middle::mir;
+use rustc_middle::ty::layout::MaybeResult;
 use rustc_middle::ty::{
    self,
    layout::{LayoutOf, TyAndLayout},
@ -159,6 +160,35 @@ fn try_resolve_did(tcx: TyCtxt<'_>, path: &[&str], namespace: Option<Namespace>)
    None
 }

+/// Gets an instance for a path; fails gracefully if the path does not exist.
+pub fn try_resolve_path<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    path: &[&str],
+    namespace: Namespace,
+) -> Option<ty::Instance<'tcx>> {
+    let did = try_resolve_did(tcx, path, Some(namespace))?;
+    Some(ty::Instance::mono(tcx, did))
+}
+
+/// Gets an instance for a path.
+#[track_caller]
+pub fn resolve_path<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    path: &[&str],
+    namespace: Namespace,
+) -> ty::Instance<'tcx> {
+    try_resolve_path(tcx, path, namespace)
+        .unwrap_or_else(|| panic!("failed to find required Rust item: {path:?}"))
+}
+
+/// Gets the layout of a type at a path.
+#[track_caller]
+pub fn path_ty_layout<'tcx>(cx: &impl LayoutOf<'tcx>, path: &[&str]) -> TyAndLayout<'tcx> {
+    let ty =
+        resolve_path(cx.tcx(), path, Namespace::TypeNS).ty(cx.tcx(), ty::ParamEnv::reveal_all());
+    cx.layout_of(ty).to_result().ok().unwrap()
+}
+
 /// Call `f` for each exported symbol.
 pub fn iter_exported_symbols<'tcx>(
    tcx: TyCtxt<'tcx>,
@ -259,23 +289,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
        try_resolve_did(*self.eval_context_ref().tcx, path, None).is_some()
    }

-    /// Gets an instance for a path; fails gracefully if the path does not exist.
-    fn try_resolve_path(&self, path: &[&str], namespace: Namespace) -> Option<ty::Instance<'tcx>> {
-        let tcx = self.eval_context_ref().tcx.tcx;
-        let did = try_resolve_did(tcx, path, Some(namespace))?;
-        Some(ty::Instance::mono(tcx, did))
-    }
-
-    /// Gets an instance for a path.
-    fn resolve_path(&self, path: &[&str], namespace: Namespace) -> ty::Instance<'tcx> {
-        self.try_resolve_path(path, namespace)
-            .unwrap_or_else(|| panic!("failed to find required Rust item: {path:?}"))
-    }
-
    /// Evaluates the scalar at the specified path.
    fn eval_path(&self, path: &[&str]) -> OpTy<'tcx> {
        let this = self.eval_context_ref();
-        let instance = this.resolve_path(path, Namespace::ValueNS);
+        let instance = resolve_path(*this.tcx, path, Namespace::ValueNS);
        // We don't give a span -- this isn't actually used directly by the program anyway.
        let const_val = this.eval_global(instance).unwrap_or_else(|err| {
            panic!("failed to evaluate required Rust item: {path:?}\n{err:?}")
@ -344,19 +361,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                "`libc` crate is not reliably available on Windows targets; Miri should not use it there"
            );
        }
-        let ty = this
-            .resolve_path(&["libc", name], Namespace::TypeNS)
-            .ty(*this.tcx, ty::ParamEnv::reveal_all());
-        this.layout_of(ty).unwrap()
+        path_ty_layout(this, &["libc", name])
    }

    /// Helper function to get the `TyAndLayout` of a `windows` type
    fn windows_ty_layout(&self, name: &str) -> TyAndLayout<'tcx> {
        let this = self.eval_context_ref();
-        let ty = this
-            .resolve_path(&["std", "sys", "pal", "windows", "c", name], Namespace::TypeNS)
-            .ty(*this.tcx, ty::ParamEnv::reveal_all());
-        this.layout_of(ty).unwrap()
+        path_ty_layout(this, &["std", "sys", "pal", "windows", "c", name])
    }

    /// Project to the given *named* field (which must be a struct or union type).
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@ -129,6 +129,7 @@ pub use crate::borrow_tracker::{
 };
 pub use crate::clock::{Clock, Instant};
 pub use crate::concurrency::{
+    cpu_affinity::MAX_CPUS,
    data_race::{AtomicFenceOrd, AtomicReadOrd, AtomicRwOrd, AtomicWriteOrd, EvalContextExt as _},
    init_once::{EvalContextExt as _, InitOnceId},
    sync::{CondvarId, EvalContextExt as _, MutexId, RwLockId, SynchronizationObjects},
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@ -30,6 +30,7 @@ use rustc_target::spec::abi::Abi;

 use crate::{
    concurrency::{
+        cpu_affinity::{self, CpuAffinityMask},
        data_race::{self, NaReadType, NaWriteType},
        weak_memory,
    },
@ -471,6 +472,12 @@ pub struct MiriMachine<'tcx> {

    /// The set of threads.
    pub(crate) threads: ThreadManager<'tcx>,
+
+    /// Stores which thread is eligible to run on which CPUs.
+    /// This has no effect at all, it is just tracked to produce the correct result
+    /// in `sched_getaffinity`
+    pub(crate) thread_cpu_affinity: FxHashMap<ThreadId, CpuAffinityMask>,
+
    /// The state of the primitive synchronization objects.
    pub(crate) sync: SynchronizationObjects,

@ -627,6 +634,18 @@ impl<'tcx> MiriMachine<'tcx> {
        let stack_addr = if tcx.pointer_size().bits() < 32 { page_size } else { page_size * 32 };
        let stack_size =
            if tcx.pointer_size().bits() < 32 { page_size * 4 } else { page_size * 16 };
+        assert!(
+            usize::try_from(config.num_cpus).unwrap() <= cpu_affinity::MAX_CPUS,
+            "miri only supports up to {} CPUs, but {} were configured",
+            cpu_affinity::MAX_CPUS,
+            config.num_cpus
+        );
+        let threads = ThreadManager::default();
+        let mut thread_cpu_affinity = FxHashMap::default();
+        if matches!(&*tcx.sess.target.os, "linux" | "freebsd" | "android") {
+            thread_cpu_affinity
+                .insert(threads.active_thread(), CpuAffinityMask::new(&layout_cx, config.num_cpus));
+        }
        MiriMachine {
            tcx,
            borrow_tracker,
@ -644,7 +663,8 @@ impl<'tcx> MiriMachine<'tcx> {
            fds: shims::FdTable::new(config.mute_stdout_stderr),
            dirs: Default::default(),
            layouts,
-            threads: ThreadManager::default(),
+            threads,
+            thread_cpu_affinity,
            sync: SynchronizationObjects::default(),
            static_roots: Vec::new(),
            profiler,
@ -765,6 +785,7 @@ impl VisitProvenance for MiriMachine<'_> {
        #[rustfmt::skip]
        let MiriMachine {
            threads,
+            thread_cpu_affinity: _,
            sync: _,
            tls,
            env_vars,
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@ -3,8 +3,10 @@ use std::str;

 use rustc_middle::ty::layout::LayoutOf;
 use rustc_span::Symbol;
+use rustc_target::abi::Size;
 use rustc_target::spec::abi::Abi;

+use crate::concurrency::cpu_affinity::CpuAffinityMask;
 use crate::shims::alloc::EvalContextExt as _;
 use crate::shims::unix::*;
 use crate::*;
@ -571,6 +573,99 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                let result = this.nanosleep(req, rem)?;
                this.write_scalar(Scalar::from_i32(result), dest)?;
            }
+            "sched_getaffinity" => {
+                // Currently this function does not exist on all Unixes, e.g. on macOS.
+                if !matches!(&*this.tcx.sess.target.os, "linux" | "freebsd" | "android") {
+                    throw_unsup_format!(
+                        "`sched_getaffinity` is not supported on {}",
+                        this.tcx.sess.target.os
+                    );
+                }
+
+                let [pid, cpusetsize, mask] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                let pid = this.read_scalar(pid)?.to_u32()?;
+                let cpusetsize = this.read_target_usize(cpusetsize)?;
+                let mask = this.read_pointer(mask)?;
+
+                // TODO: when https://github.com/rust-lang/miri/issues/3730 is fixed this should use its notion of tid/pid
+                let thread_id = match pid {
+                    0 => this.active_thread(),
+                    _ => throw_unsup_format!("`sched_getaffinity` is only supported with a pid of 0 (indicating the current thread)"),
+                };
+
+                // The mask is stored in chunks, and the size must be a whole number of chunks.
+                let chunk_size = CpuAffinityMask::chunk_size(this);
+
+                if this.ptr_is_null(mask)? {
+                    let einval = this.eval_libc("EFAULT");
+                    this.set_last_error(einval)?;
+                    this.write_scalar(Scalar::from_i32(-1), dest)?;
+                } else if cpusetsize == 0 || cpusetsize.checked_rem(chunk_size).unwrap() != 0 {
+                    // we only copy whole chunks of size_of::<c_ulong>()
+                    let einval = this.eval_libc("EINVAL");
+                    this.set_last_error(einval)?;
+                    this.write_scalar(Scalar::from_i32(-1), dest)?;
+                } else if let Some(cpuset) = this.machine.thread_cpu_affinity.get(&thread_id) {
+                    let cpuset = cpuset.clone();
+                    // we only copy whole chunks of size_of::<c_ulong>()
+                    let byte_count = Ord::min(cpuset.as_slice().len(), cpusetsize.try_into().unwrap());
+                    this.write_bytes_ptr(mask, cpuset.as_slice()[..byte_count].iter().copied())?;
+                    this.write_scalar(Scalar::from_i32(0), dest)?;
+                } else {
+                    // The thread whose ID is pid could not be found
+                    let einval = this.eval_libc("ESRCH");
+                    this.set_last_error(einval)?;
+                    this.write_scalar(Scalar::from_i32(-1), dest)?;
+                }
+            }
+            "sched_setaffinity" => {
+                // Currently this function does not exist on all Unixes, e.g. on macOS.
+                if !matches!(&*this.tcx.sess.target.os, "linux" | "freebsd" | "android") {
+                    throw_unsup_format!(
+                        "`sched_setaffinity` is not supported on {}",
+                        this.tcx.sess.target.os
+                    );
+                }
+
+                let [pid, cpusetsize, mask] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                let pid = this.read_scalar(pid)?.to_u32()?;
+                let cpusetsize = this.read_target_usize(cpusetsize)?;
+                let mask = this.read_pointer(mask)?;
+
+                // TODO: when https://github.com/rust-lang/miri/issues/3730 is fixed this should use its notion of tid/pid
+                let thread_id = match pid {
+                    0 => this.active_thread(),
+                    _ => throw_unsup_format!("`sched_setaffinity` is only supported with a pid of 0 (indicating the current thread)"),
+                };
+
+                if this.ptr_is_null(mask)? {
+                    let einval = this.eval_libc("EFAULT");
+                    this.set_last_error(einval)?;
+                    this.write_scalar(Scalar::from_i32(-1), dest)?;
+                } else {
+                    // NOTE: cpusetsize might be smaller than `CpuAffinityMask::CPU_MASK_BYTES`.
+                    // Any unspecified bytes are treated as zero here (none of the CPUs are configured).
+                    // This is not exactly documented, so we assume that this is the behavior in practice.
+                    let bits_slice = this.read_bytes_ptr_strip_provenance(mask, Size::from_bytes(cpusetsize))?;
+                    // This ignores the bytes beyond `CpuAffinityMask::CPU_MASK_BYTES`
+                    let bits_array: [u8; CpuAffinityMask::CPU_MASK_BYTES] =
+                        std::array::from_fn(|i| bits_slice.get(i).copied().unwrap_or(0));
+                    match CpuAffinityMask::from_array(this, this.machine.num_cpus, bits_array) {
+                        Some(cpuset) => {
+                            this.machine.thread_cpu_affinity.insert(thread_id, cpuset);
+                            this.write_scalar(Scalar::from_i32(0), dest)?;
+                        }
+                        None => {
+                            // The intersection between the mask and the available CPUs was empty.
+                            let einval = this.eval_libc("EINVAL");
+                            this.set_last_error(einval)?;
+                            this.write_scalar(Scalar::from_i32(-1), dest)?;
+                        }
+                    }
+                }
+            }

            // Miscellaneous
            "isatty" => {
--- a/src/tools/miri/src/shims/unix/linux/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
@ -178,19 +178,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

                this.write_scalar(Scalar::from_i32(SIGRTMAX), dest)?;
            }
-            "sched_getaffinity" => {
-                // This shim isn't useful, aside from the fact that it makes `num_cpus`
-                // fall back to `sysconf` where it will successfully determine the number of CPUs.
-                let [pid, cpusetsize, mask] =
-                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-                this.read_scalar(pid)?.to_i32()?;
-                this.read_target_usize(cpusetsize)?;
-                this.deref_pointer_as(mask, this.libc_ty_layout("cpu_set_t"))?;
-                // FIXME: we just return an error.
-                let einval = this.eval_libc("EINVAL");
-                this.set_last_error(einval)?;
-                this.write_scalar(Scalar::from_i32(-1), dest)?;
-            }

            // Incomplete shims that we "stub out" just to get pre-main initialization code to work.
            // These shims are enabled only when the caller is in the standard library.
--- a/src/tools/miri/tests/fail-dep/libc/affinity.rs
+++ b/src/tools/miri/tests/fail-dep/libc/affinity.rs
@ -0,0 +1,17 @@
+//@ignore-target-windows: only very limited libc on Windows
+//@ignore-target-apple: `sched_setaffinity` is not supported on macOS
+//@compile-flags: -Zmiri-disable-isolation -Zmiri-num-cpus=4
+
+fn main() {
+    use libc::{cpu_set_t, sched_setaffinity};
+
+    use std::mem::size_of;
+
+    // If pid is zero, then the calling thread is used.
+    const PID: i32 = 0;
+
+    let cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>() + 1, &cpuset) }; //~ ERROR: memory access failed
+    assert_eq!(err, 0);
+}
--- a/src/tools/miri/tests/fail-dep/libc/affinity.stderr
+++ b/src/tools/miri/tests/fail-dep/libc/affinity.stderr
@ -0,0 +1,20 @@
+error: Undefined Behavior: memory access failed: ALLOC has size 128, so pointer to 129 bytes starting at offset 0 is out-of-bounds
+  --> $DIR/affinity.rs:LL:CC
+   |
+LL |     let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>() + 1, &cpuset) };
+   |                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ memory access failed: ALLOC has size 128, so pointer to 129 bytes starting at offset 0 is out-of-bounds
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+help: ALLOC was allocated here:
+  --> $DIR/affinity.rs:LL:CC
+   |
+LL |     let cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+   |         ^^^^^^
+   = note: BACKTRACE (of the first span):
+   = note: inside `main` at $DIR/affinity.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
--- a/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
+++ b/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
@ -0,0 +1,218 @@
+//@ignore-target-windows: only very limited libc on Windows
+//@ignore-target-apple: `sched_{g, s}etaffinity` are not supported on macOS
+//@compile-flags: -Zmiri-disable-isolation -Zmiri-num-cpus=4
+#![feature(io_error_more)]
+#![feature(pointer_is_aligned_to)]
+#![feature(strict_provenance)]
+
+use libc::{cpu_set_t, sched_getaffinity, sched_setaffinity};
+use std::mem::{size_of, size_of_val};
+
+// If pid is zero, then the calling thread is used.
+const PID: i32 = 0;
+
+fn null_pointers() {
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), std::ptr::null_mut()) };
+    assert_eq!(err, -1);
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), std::ptr::null()) };
+    assert_eq!(err, -1);
+}
+
+fn configure_no_cpus() {
+    let cpu_count = std::thread::available_parallelism().unwrap().get();
+
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    // configuring no CPUs will fail
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, -1);
+    assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+
+    // configuring no (physically available) CPUs will fail
+    unsafe { libc::CPU_SET(cpu_count, &mut cpuset) };
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, -1);
+    assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+}
+
+fn configure_unavailable_cpu() {
+    let cpu_count = std::thread::available_parallelism().unwrap().get();
+
+    // Safety: valid value for this type
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+
+    // by default, only available CPUs are configured
+    for i in 0..cpu_count {
+        assert!(unsafe { libc::CPU_ISSET(i, &cpuset) });
+    }
+    assert!(unsafe { !libc::CPU_ISSET(cpu_count, &cpuset) });
+
+    // configure CPU that we don't have
+    unsafe { libc::CPU_SET(cpu_count, &mut cpuset) };
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, 0);
+
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+
+    // the CPU is not set because it is not available
+    assert!(!unsafe { libc::CPU_ISSET(cpu_count, &cpuset) });
+}
+
+fn large_set() {
+    // rust's libc does not currently implement dynamic cpu set allocation
+    // and related functions like `CPU_ZERO_S`. So we have to be creative
+
+    // i.e. this has 2048 bits, twice the standard number
+    let mut cpuset = [u64::MAX; 32];
+
+    let err = unsafe { sched_setaffinity(PID, size_of_val(&cpuset), cpuset.as_ptr().cast()) };
+    assert_eq!(err, 0);
+
+    let err = unsafe { sched_getaffinity(PID, size_of_val(&cpuset), cpuset.as_mut_ptr().cast()) };
+    assert_eq!(err, 0);
+}
+
+fn get_small_cpu_mask() {
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    // should be 4 on 32-bit systems and 8 otherwise for systems that implement sched_getaffinity
+    let step = size_of::<std::ffi::c_ulong>();
+
+    for i in (0..=2).map(|x| x * step) {
+        if i == 0 {
+            // 0 always fails
+            let err = unsafe { sched_getaffinity(PID, i, &mut cpuset) };
+            assert_eq!(err, -1, "fail for {}", i);
+            assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+        } else {
+            // other whole multiples of the size of c_ulong works
+            let err = unsafe { sched_getaffinity(PID, i, &mut cpuset) };
+            assert_eq!(err, 0, "fail for {i}");
+        }
+
+        // anything else returns an error
+        for j in 1..step {
+            let err = unsafe { sched_getaffinity(PID, i + j, &mut cpuset) };
+            assert_eq!(err, -1, "success for {}", i + j);
+            assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+        }
+    }
+}
+
+fn set_small_cpu_mask() {
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+
+    // setting a mask of size 0 is invalid
+    let err = unsafe { sched_setaffinity(PID, 0, &cpuset) };
+    assert_eq!(err, -1);
+    assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+
+    // on LE systems, any other number of bytes (at least up to `size_of<cpu_set_t>()`) will work.
+    // on BE systems the CPUs 0..8 are stored in the right-most byte of the first chunk. If that
+    // byte is not included, no valid CPUs are configured. We skip those cases.
+    let cpu_zero_included_length =
+        if cfg!(target_endian = "little") { 1 } else { core::mem::size_of::<std::ffi::c_ulong>() };
+
+    for i in cpu_zero_included_length..24 {
+        let err = unsafe { sched_setaffinity(PID, i, &cpuset) };
+        assert_eq!(err, 0, "fail for {i}");
+    }
+}
+
+fn set_custom_cpu_mask() {
+    let cpu_count = std::thread::available_parallelism().unwrap().get();
+
+    assert!(cpu_count > 1, "this test cannot do anything interesting with just one thread");
+
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    // at the start, thread 1 should be set
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+    assert!(unsafe { libc::CPU_ISSET(1, &cpuset) });
+
+    // make a valid mask
+    unsafe { libc::CPU_ZERO(&mut cpuset) };
+    unsafe { libc::CPU_SET(0, &mut cpuset) };
+
+    // giving a smaller mask is fine
+    let err = unsafe { sched_setaffinity(PID, 8, &cpuset) };
+    assert_eq!(err, 0);
+
+    // and actually disables other threads
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+    assert!(unsafe { !libc::CPU_ISSET(1, &cpuset) });
+
+    // it is important that we reset the cpu mask now for future tests
+    for i in 0..cpu_count {
+        unsafe { libc::CPU_SET(i, &mut cpuset) };
+    }
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, 0);
+}
+
+fn parent_child() {
+    let cpu_count = std::thread::available_parallelism().unwrap().get();
+
+    assert!(cpu_count > 1, "this test cannot do anything interesting with just one thread");
+
+    // configure the parent thread to only run only on CPU 0
+    let mut parent_cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+    unsafe { libc::CPU_SET(0, &mut parent_cpuset) };
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &parent_cpuset) };
+    assert_eq!(err, 0);
+
+    std::thread::scope(|spawner| {
+        spawner.spawn(|| {
+            let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+            let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+            assert_eq!(err, 0);
+
+            // the child inherits its parent's set
+            assert!(unsafe { libc::CPU_ISSET(0, &cpuset) });
+            assert!(unsafe { !libc::CPU_ISSET(1, &cpuset) });
+
+            // configure cpu 1 for the child
+            unsafe { libc::CPU_SET(1, &mut cpuset) };
+        });
+    });
+
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut parent_cpuset) };
+    assert_eq!(err, 0);
+
+    // the parent's set should be unaffected
+    assert!(unsafe { !libc::CPU_ISSET(1, &parent_cpuset) });
+
+    // it is important that we reset the cpu mask now for future tests
+    let mut cpuset = parent_cpuset;
+    for i in 0..cpu_count {
+        unsafe { libc::CPU_SET(i, &mut cpuset) };
+    }
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, 0);
+}
+
+fn main() {
+    null_pointers();
+    configure_no_cpus();
+    configure_unavailable_cpu();
+    large_set();
+    get_small_cpu_mask();
+    set_small_cpu_mask();
+    set_custom_cpu_mask();
+    parent_child();
+}