From c77a2c6c0c463a1b53eafd1eb65d55600a0d5045 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Fri, 21 Jun 2024 20:44:25 +0200
Subject: [PATCH 1/4] implement `libc::sched_getaffinity` and
 `libc::sched_setaffinity`

---
 src/tools/miri/src/bin/miri.rs                |   3 +
 .../miri/src/concurrency/cpu_affinity.rs      |  95 +++++++++
 src/tools/miri/src/concurrency/mod.rs         |   1 +
 src/tools/miri/src/concurrency/thread.rs      |   5 +
 src/tools/miri/src/lib.rs                     |   1 +
 src/tools/miri/src/machine.rs                 |  25 ++-
 .../miri/src/shims/unix/foreign_items.rs      |  97 +++++++++
 .../src/shims/unix/linux/foreign_items.rs     |  13 --
 .../miri/tests/fail-dep/libc/affinity.rs      |  17 ++
 .../miri/tests/fail-dep/libc/affinity.stderr  |  20 ++
 .../miri/tests/pass-dep/libc/libc-affinity.rs | 194 ++++++++++++++++++
 11 files changed, 457 insertions(+), 14 deletions(-)
 create mode 100644 src/tools/miri/src/concurrency/cpu_affinity.rs
 create mode 100644 src/tools/miri/tests/fail-dep/libc/affinity.rs
 create mode 100644 src/tools/miri/tests/fail-dep/libc/affinity.stderr
 create mode 100644 src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
diff --git a/src/tools/miri/src/bin/miri.rs b/src/tools/miri/src/bin/miri.rs
index 9d8e44ce409e..9f3fa075f38f 100644
--- a/src/tools/miri/src/bin/miri.rs
+++ b/src/tools/miri/src/bin/miri.rs
@@ -592,6 +592,9 @@ fn main() {
             let num_cpus = param
                 .parse::<u32>()
                 .unwrap_or_else(|err| show_error!("-Zmiri-num-cpus requires a `u32`: {}", err));
+            if !(1..=miri::MAX_CPUS).contains(&usize::try_from(num_cpus).unwrap()) {
+                show_error!("-Zmiri-num-cpus must be in the range 1..={}", miri::MAX_CPUS);
+            }
             miri_config.num_cpus = num_cpus;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-force-page-size=") {
             let page_size = param.parse::<u64>().unwrap_or_else(|err| {
diff --git a/src/tools/miri/src/concurrency/cpu_affinity.rs b/src/tools/miri/src/concurrency/cpu_affinity.rs
new file mode 100644
index 000000000000..085900ac3aa6
--- /dev/null
+++ b/src/tools/miri/src/concurrency/cpu_affinity.rs
@@ -0,0 +1,95 @@
+use crate::bug;
+use rustc_target::abi::Endian;
+
+/// The maximum number of CPUs supported by miri.
+///
+/// This value is compatible with the libc `CPU_SETSIZE` constant and corresponds to the number
+/// of CPUs that a `cpu_set_t` can contain.
+///
+/// Real machines can have more CPUs than this number, and there exist APIs to set their affinity,
+/// but this is not currently supported by miri.
+pub const MAX_CPUS: usize = 1024;
+
+/// A thread's CPU affinity mask determines the set of CPUs on which it is eligible to run.
+// the actual representation depends on the target's endianness and pointer width.
+// See CpuAffinityMask::set for details
+#[derive(Clone)]
+pub(crate) struct CpuAffinityMask([u8; Self::CPU_MASK_BYTES]);
+
+impl CpuAffinityMask {
+    pub(crate) const CPU_MASK_BYTES: usize = MAX_CPUS / 8;
+
+    pub fn new(target: &rustc_target::spec::Target, cpu_count: u32) -> Self {
+        let mut this = Self([0; Self::CPU_MASK_BYTES]);
+
+        // the default affinity mask includes only the available CPUs
+        for i in 0..cpu_count as usize {
+            this.set(target, i);
+        }
+
+        this
+    }
+
+    pub fn chunk_size(target: &rustc_target::spec::Target) -> u64 {
+        // The actual representation of the CpuAffinityMask is [c_ulong; _], in practice either
+        //
+        // - [u32; 32] on 32-bit platforms
+        // - [u64; 16] everywhere else
+
+        // FIXME: this should be `size_of::<core::ffi::c_ulong>()`
+        u64::from(target.pointer_width / 8)
+    }
+
+    fn set(&mut self, target: &rustc_target::spec::Target, cpu: usize) {
+        // we silently ignore CPUs that are out of bounds. This matches the behavior of
+        // `sched_setaffinity` with a mask that specifies more than `CPU_SETSIZE` CPUs.
+        if cpu >= MAX_CPUS {
+            return;
+        }
+
+        // The actual representation of the CpuAffinityMask is [c_ulong; _], in practice either
+        //
+        // - [u32; 32] on 32-bit platforms
+        // - [u64; 16] everywhere else
+        //
+        // Within the array elements, we need to use the endianness of the target.
+        match Self::chunk_size(target) {
+            4 => {
+                let start = cpu / 32 * 4; // first byte of the correct u32
+                let chunk = self.0[start..].first_chunk_mut::<4>().unwrap();
+                let offset = cpu % 32;
+                *chunk = match target.options.endian {
+                    Endian::Little => (u32::from_le_bytes(*chunk) | 1 << offset).to_le_bytes(),
+                    Endian::Big => (u32::from_be_bytes(*chunk) | 1 << offset).to_be_bytes(),
+                };
+            }
+            8 => {
+                let start = cpu / 64 * 8; // first byte of the correct u64
+                let chunk = self.0[start..].first_chunk_mut::<8>().unwrap();
+                let offset = cpu % 64;
+                *chunk = match target.options.endian {
+                    Endian::Little => (u64::from_le_bytes(*chunk) | 1 << offset).to_le_bytes(),
+                    Endian::Big => (u64::from_be_bytes(*chunk) | 1 << offset).to_be_bytes(),
+                };
+            }
+            other => bug!("other chunk sizes are not supported: {other}"),
+        };
+    }
+
+    pub fn as_slice(&self) -> &[u8] {
+        self.0.as_slice()
+    }
+
+    pub fn from_array(
+        target: &rustc_target::spec::Target,
+        cpu_count: u32,
+        bytes: [u8; Self::CPU_MASK_BYTES],
+    ) -> Option<Self> {
+        // mask by what CPUs are actually available
+        let default = Self::new(target, cpu_count);
+        let masked = std::array::from_fn(|i| bytes[i] & default.0[i]);
+
+        // at least one thread must be set for the input to be valid
+        masked.iter().any(|b| *b != 0).then_some(Self(masked))
+    }
+}
diff --git a/src/tools/miri/src/concurrency/mod.rs b/src/tools/miri/src/concurrency/mod.rs
index 822d173ac06a..17789fe9f87f 100644
--- a/src/tools/miri/src/concurrency/mod.rs
+++ b/src/tools/miri/src/concurrency/mod.rs
@@ -1,3 +1,4 @@
+pub mod cpu_affinity;
 pub mod data_race;
 pub mod init_once;
 mod range_object_map;
diff --git a/src/tools/miri/src/concurrency/thread.rs b/src/tools/miri/src/concurrency/thread.rs
index 718daf93ea00..a53dd7eac1e9 100644
--- a/src/tools/miri/src/concurrency/thread.rs
+++ b/src/tools/miri/src/concurrency/thread.rs
@@ -936,6 +936,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // After this all accesses will be treated as occurring in the new thread.
         let old_thread_id = this.machine.threads.set_active_thread_id(new_thread_id);
 
+        // The child inherits its parent's cpu affinity.
+        if let Some(cpuset) = this.machine.thread_cpu_affinity.get(&old_thread_id).cloned() {
+            this.machine.thread_cpu_affinity.insert(new_thread_id, cpuset);
+        }
+
         // Perform the function pointer load in the new thread frame.
         let instance = this.get_ptr_fn(start_routine)?.as_instance()?;
 
diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs
index 8da00861f905..7fb68d782f11 100644
--- a/src/tools/miri/src/lib.rs
+++ b/src/tools/miri/src/lib.rs
@@ -129,6 +129,7 @@ pub use crate::borrow_tracker::{
 };
 pub use crate::clock::{Clock, Instant};
 pub use crate::concurrency::{
+    cpu_affinity::MAX_CPUS,
     data_race::{AtomicFenceOrd, AtomicReadOrd, AtomicRwOrd, AtomicWriteOrd, EvalContextExt as _},
     init_once::{EvalContextExt as _, InitOnceId},
     sync::{CondvarId, EvalContextExt as _, MutexId, RwLockId, SynchronizationObjects},
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index e321237bb4a2..fee6ab068175 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -30,6 +30,7 @@ use rustc_target::spec::abi::Abi;
 
 use crate::{
     concurrency::{
+        cpu_affinity::{self, CpuAffinityMask},
         data_race::{self, NaReadType, NaWriteType},
         weak_memory,
     },
@@ -471,6 +472,12 @@ pub struct MiriMachine<'tcx> {
 
     /// The set of threads.
     pub(crate) threads: ThreadManager<'tcx>,
+
+    /// Stores which thread is eligible to run on which CPUs.
+    /// This has no effect at all, it is just tracked to produce the correct result
+    /// in `sched_getaffinity`
+    pub(crate) thread_cpu_affinity: FxHashMap<ThreadId, CpuAffinityMask>,
+
     /// The state of the primitive synchronization objects.
     pub(crate) sync: SynchronizationObjects,
 
@@ -627,6 +634,20 @@ impl<'tcx> MiriMachine<'tcx> {
         let stack_addr = if tcx.pointer_size().bits() < 32 { page_size } else { page_size * 32 };
         let stack_size =
             if tcx.pointer_size().bits() < 32 { page_size * 4 } else { page_size * 16 };
+        assert!(
+            usize::try_from(config.num_cpus).unwrap() <= cpu_affinity::MAX_CPUS,
+            "miri only supports up to {} CPUs, but {} were configured",
+            cpu_affinity::MAX_CPUS,
+            config.num_cpus
+        );
+        let threads = ThreadManager::default();
+        let mut thread_cpu_affinity = FxHashMap::default();
+        if matches!(&*tcx.sess.target.os, "linux" | "freebsd" | "android") {
+            thread_cpu_affinity.insert(
+                threads.active_thread(),
+                CpuAffinityMask::new(&tcx.sess.target, config.num_cpus),
+            );
+        }
         MiriMachine {
             tcx,
             borrow_tracker,
@@ -644,7 +665,8 @@ impl<'tcx> MiriMachine<'tcx> {
             fds: shims::FdTable::new(config.mute_stdout_stderr),
             dirs: Default::default(),
             layouts,
-            threads: ThreadManager::default(),
+            threads,
+            thread_cpu_affinity,
             sync: SynchronizationObjects::default(),
             static_roots: Vec::new(),
             profiler,
@@ -765,6 +787,7 @@ impl VisitProvenance for MiriMachine<'_> {
         #[rustfmt::skip]
         let MiriMachine {
             threads,
+            thread_cpu_affinity: _,
             sync: _,
             tls,
             env_vars,
diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs
index 2421f9244f36..f5d3e0b536be 100644
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@@ -3,8 +3,10 @@ use std::str;
 
 use rustc_middle::ty::layout::LayoutOf;
 use rustc_span::Symbol;
+use rustc_target::abi::Size;
 use rustc_target::spec::abi::Abi;
 
+use crate::concurrency::cpu_affinity::CpuAffinityMask;
 use crate::shims::alloc::EvalContextExt as _;
 use crate::shims::unix::*;
 use crate::*;
@@ -571,6 +573,101 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 let result = this.nanosleep(req, rem)?;
                 this.write_scalar(Scalar::from_i32(result), dest)?;
             }
+            "sched_getaffinity" => {
+                // Currently this function does not exist on all Unixes, e.g. on macOS.
+                if !matches!(&*this.tcx.sess.target.os, "linux" | "freebsd" | "android") {
+                    throw_unsup_format!(
+                        "`sched_getaffinity` is not supported on {}",
+                        this.tcx.sess.target.os
+                    );
+                }
+
+                let [pid, cpusetsize, mask] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                let pid = this.read_scalar(pid)?.to_u32()?;
+                let cpusetsize = this.read_target_usize(cpusetsize)?;
+                let mask = this.read_pointer(mask)?;
+
+                // TODO: when https://github.com/rust-lang/miri/issues/3730 is fixed this should use its notion of tid/pid
+                let thread_id = match pid {
+                    0 => this.active_thread(),
+                    _ => throw_unsup_format!("`sched_getaffinity` is only supported with a pid of 0 (indicating the current thread)"),
+                };
+
+                // The actual representation of the CpuAffinityMask is [c_ulong; _], in practice either
+                //
+                // - [u32; 32] on 32-bit platforms
+                // - [u64; 16] everywhere else
+                let chunk_size = CpuAffinityMask::chunk_size(&this.tcx.sess.target);
+
+                if this.ptr_is_null(mask)? {
+                    let einval = this.eval_libc("EFAULT");
+                    this.set_last_error(einval)?;
+                    this.write_scalar(Scalar::from_i32(-1), dest)?;
+                } else if cpusetsize == 0 || cpusetsize.checked_rem(chunk_size).unwrap() != 0 {
+                    // we only copy whole chunks of size_of::<c_ulong>()
+                    let einval = this.eval_libc("EINVAL");
+                    this.set_last_error(einval)?;
+                    this.write_scalar(Scalar::from_i32(-1), dest)?;
+                } else if let Some(cpuset) = this.machine.thread_cpu_affinity.get(&thread_id) {
+                    let cpuset = cpuset.clone();
+                    // we only copy whole chunks of size_of::<c_ulong>()
+                    let byte_count = Ord::min(cpuset.as_slice().len(), cpusetsize.try_into().unwrap());
+                    this.write_bytes_ptr(mask, cpuset.as_slice()[..byte_count].iter().copied())?;
+                    this.write_scalar(Scalar::from_i32(0), dest)?;
+                } else {
+                    // The thread whose ID is pid could not be found
+                    let einval = this.eval_libc("ESRCH");
+                    this.set_last_error(einval)?;
+                    this.write_scalar(Scalar::from_i32(-1), dest)?;
+                }
+            }
+            "sched_setaffinity" => {
+                // Currently this function does not exist on all Unixes, e.g. on macOS.
+                if !matches!(&*this.tcx.sess.target.os, "linux" | "freebsd" | "android") {
+                    throw_unsup_format!(
+                        "`sched_setaffinity` is not supported on {}",
+                        this.tcx.sess.target.os
+                    );
+                }
+
+                let [pid, cpusetsize, mask] =
+                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                let pid = this.read_scalar(pid)?.to_u32()?;
+                let cpusetsize = this.read_target_usize(cpusetsize)?;
+                let mask = this.read_pointer(mask)?;
+
+                // TODO: when https://github.com/rust-lang/miri/issues/3730 is fixed this should use its notion of tid/pid
+                let thread_id = match pid {
+                    0 => this.active_thread(),
+                    _ => throw_unsup_format!("`sched_setaffinity` is only supported with a pid of 0 (indicating the current thread)"),
+                };
+
+                #[allow(clippy::map_entry)]
+                if this.ptr_is_null(mask)? {
+                    let einval = this.eval_libc("EFAULT");
+                    this.set_last_error(einval)?;
+                    this.write_scalar(Scalar::from_i32(-1), dest)?;
+                } else {
+                    // NOTE: cpusetsize might be smaller than `CpuAffinityMask::CPU_MASK_BYTES`
+                    let bits_slice = this.read_bytes_ptr_strip_provenance(mask, Size::from_bytes(cpusetsize))?;
+                    // This ignores the bytes beyond `CpuAffinityMask::CPU_MASK_BYTES`
+                    let bits_array: [u8;CpuAffinityMask::CPU_MASK_BYTES] =
+                        std::array::from_fn(|i| bits_slice.get(i).copied().unwrap_or(0));
+                    match CpuAffinityMask::from_array(&this.tcx.sess.target, this.machine.num_cpus, bits_array) {
+                        Some(cpuset) => {
+                            this.machine.thread_cpu_affinity.insert(thread_id, cpuset);
+                            this.write_scalar(Scalar::from_i32(0), dest)?;
+                        }
+                        None => {
+                            // The intersection between the mask and the available CPUs was empty.
+                            let einval = this.eval_libc("EINVAL");
+                            this.set_last_error(einval)?;
+                            this.write_scalar(Scalar::from_i32(-1), dest)?;
+                        }
+                    }
+                }
+            }
 
             // Miscellaneous
             "isatty" => {
diff --git a/src/tools/miri/src/shims/unix/linux/foreign_items.rs b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
index e31d43d9190a..95bee38cd783 100644
--- a/src/tools/miri/src/shims/unix/linux/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/linux/foreign_items.rs
@@ -178,19 +178,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                 this.write_scalar(Scalar::from_i32(SIGRTMAX), dest)?;
             }
-            "sched_getaffinity" => {
-                // This shim isn't useful, aside from the fact that it makes `num_cpus`
-                // fall back to `sysconf` where it will successfully determine the number of CPUs.
-                let [pid, cpusetsize, mask] =
-                    this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-                this.read_scalar(pid)?.to_i32()?;
-                this.read_target_usize(cpusetsize)?;
-                this.deref_pointer_as(mask, this.libc_ty_layout("cpu_set_t"))?;
-                // FIXME: we just return an error.
-                let einval = this.eval_libc("EINVAL");
-                this.set_last_error(einval)?;
-                this.write_scalar(Scalar::from_i32(-1), dest)?;
-            }
 
             // Incomplete shims that we "stub out" just to get pre-main initialization code to work.
             // These shims are enabled only when the caller is in the standard library.
diff --git a/src/tools/miri/tests/fail-dep/libc/affinity.rs b/src/tools/miri/tests/fail-dep/libc/affinity.rs
new file mode 100644
index 000000000000..c41d1d18018c
--- /dev/null
+++ b/src/tools/miri/tests/fail-dep/libc/affinity.rs
@@ -0,0 +1,17 @@
+//@ignore-target-windows: only very limited libc on Windows
+//@ignore-target-apple: `sched_setaffinity` is not supported on macOS
+//@compile-flags: -Zmiri-disable-isolation -Zmiri-num-cpus=4
+
+fn main() {
+    use libc::{cpu_set_t, sched_setaffinity};
+
+    use std::mem::size_of;
+
+    // If pid is zero, then the calling thread is used.
+    const PID: i32 = 0;
+
+    let cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>() + 1, &cpuset) }; //~ ERROR: memory access failed
+    assert_eq!(err, 0);
+}
diff --git a/src/tools/miri/tests/fail-dep/libc/affinity.stderr b/src/tools/miri/tests/fail-dep/libc/affinity.stderr
new file mode 100644
index 000000000000..c01f15800fac
--- /dev/null
+++ b/src/tools/miri/tests/fail-dep/libc/affinity.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: memory access failed: ALLOC has size 128, so pointer to 129 bytes starting at offset 0 is out-of-bounds
+  --> $DIR/affinity.rs:LL:CC
+   |
+LL |     let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>() + 1, &cpuset) };
+   |                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ memory access failed: ALLOC has size 128, so pointer to 129 bytes starting at offset 0 is out-of-bounds
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+help: ALLOC was allocated here:
+  --> $DIR/affinity.rs:LL:CC
+   |
+LL |     let cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+   |         ^^^^^^
+   = note: BACKTRACE (of the first span):
+   = note: inside `main` at $DIR/affinity.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
diff --git a/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs b/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
new file mode 100644
index 000000000000..d360864b97c3
--- /dev/null
+++ b/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
@@ -0,0 +1,194 @@
+//@ignore-target-windows: only very limited libc on Windows
+//@ignore-target-apple: `sched_{g, s}etaffinity` are not supported on macOS
+//@compile-flags: -Zmiri-disable-isolation -Zmiri-num-cpus=4
+#![feature(io_error_more)]
+#![feature(pointer_is_aligned_to)]
+#![feature(strict_provenance)]
+
+use libc::{cpu_set_t, sched_getaffinity, sched_setaffinity};
+use std::mem::{size_of, size_of_val};
+
+// If pid is zero, then the calling thread is used.
+const PID: i32 = 0;
+
+fn null_pointers() {
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), std::ptr::null_mut()) };
+    assert_eq!(err, -1);
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), std::ptr::null()) };
+    assert_eq!(err, -1);
+}
+
+fn configure_no_cpus() {
+    let cpu_count = std::thread::available_parallelism().unwrap().get();
+
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    // configuring no CPUs will fail
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, -1);
+    assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+
+    // configuring no (physically available) CPUs will fail
+    unsafe { libc::CPU_SET(cpu_count, &mut cpuset) };
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, -1);
+    assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+}
+
+fn configure_unavailable_cpu() {
+    let cpu_count = std::thread::available_parallelism().unwrap().get();
+
+    // Safety: valid value for this type
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+
+    // by default, only available CPUs are configured
+    for i in 0..cpu_count {
+        assert!(unsafe { libc::CPU_ISSET(i, &cpuset) });
+    }
+    assert!(unsafe { !libc::CPU_ISSET(cpu_count, &cpuset) });
+
+    // configure CPU that we don't have
+    unsafe { libc::CPU_SET(cpu_count, &mut cpuset) };
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, 0);
+
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+
+    // the CPU is not set because it is not available
+    assert!(!unsafe { libc::CPU_ISSET(cpu_count, &cpuset) });
+}
+
+fn large_set() {
+    // rust's libc does not currently implement dynamic cpu set allocation
+    // and related functions like `CPU_ZERO_S`. So we have to be creative
+
+    // i.e. this has 2048 bits, twice the standard number
+    let mut cpuset = [u64::MAX; 32];
+
+    let err = unsafe { sched_setaffinity(PID, size_of_val(&cpuset), cpuset.as_ptr().cast()) };
+    assert_eq!(err, 0);
+
+    let err = unsafe { sched_getaffinity(PID, size_of_val(&cpuset), cpuset.as_mut_ptr().cast()) };
+    assert_eq!(err, 0);
+}
+
+fn get_small_cpu_mask() {
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    // should be 4 on 32-bit systems and 8 otherwise for systems that implement sched_getaffinity
+    let step = size_of::<std::ffi::c_ulong>();
+
+    for i in (0..=2).map(|x| x * step) {
+        if i == 0 {
+            // 0 always fails
+            let err = unsafe { sched_getaffinity(PID, i, &mut cpuset) };
+            assert_eq!(err, -1, "fail for {}", i);
+            assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+        } else {
+            // other whole multiples of the size of c_ulong works
+            let err = unsafe { sched_getaffinity(PID, i, &mut cpuset) };
+            assert_eq!(err, 0, "fail for {i}");
+        }
+
+        // anything else returns an error
+        for j in 1..step {
+            let err = unsafe { sched_getaffinity(PID, i + j, &mut cpuset) };
+            assert_eq!(err, -1, "success for {}", i + j);
+            assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+        }
+    }
+}
+
+fn set_custom_cpu_mask() {
+    let cpu_count = std::thread::available_parallelism().unwrap().get();
+
+    assert!(cpu_count > 1, "this test cannot do anything interesting with just one thread");
+
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    // at the start, thread 1 should be set
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+    assert!(unsafe { libc::CPU_ISSET(1, &cpuset) });
+
+    // make a valid mask
+    unsafe { libc::CPU_ZERO(&mut cpuset) };
+    unsafe { libc::CPU_SET(0, &mut cpuset) };
+
+    // giving a smaller mask is fine
+    let err = unsafe { sched_setaffinity(PID, 8, &cpuset) };
+    assert_eq!(err, 0);
+
+    // and actually disables other threads
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+    assert!(unsafe { !libc::CPU_ISSET(1, &cpuset) });
+
+    // it is important that we reset the cpu mask now for future tests
+    for i in 0..cpu_count {
+        unsafe { libc::CPU_SET(i, &mut cpuset) };
+    }
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, 0);
+}
+
+fn parent_child() {
+    let cpu_count = std::thread::available_parallelism().unwrap().get();
+
+    assert!(cpu_count > 1, "this test cannot do anything interesting with just one thread");
+
+    // configure the parent thread to only run only on CPU 0
+    let mut parent_cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+    unsafe { libc::CPU_SET(0, &mut parent_cpuset) };
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &parent_cpuset) };
+    assert_eq!(err, 0);
+
+    std::thread::scope(|spawner| {
+        spawner.spawn(|| {
+            let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+            let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+            assert_eq!(err, 0);
+
+            // the child inherits its parent's set
+            assert!(unsafe { libc::CPU_ISSET(0, &cpuset) });
+            assert!(unsafe { !libc::CPU_ISSET(1, &cpuset) });
+
+            // configure cpu 1 for the child
+            unsafe { libc::CPU_SET(1, &mut cpuset) };
+        });
+    });
+
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut parent_cpuset) };
+    assert_eq!(err, 0);
+
+    // the parent's set should be unaffected
+    assert!(unsafe { !libc::CPU_ISSET(1, &parent_cpuset) });
+
+    // it is important that we reset the cpu mask now for future tests
+    let mut cpuset = parent_cpuset;
+    for i in 0..cpu_count {
+        unsafe { libc::CPU_SET(i, &mut cpuset) };
+    }
+
+    let err = unsafe { sched_setaffinity(PID, size_of::<cpu_set_t>(), &cpuset) };
+    assert_eq!(err, 0);
+}
+
+fn main() {
+    null_pointers();
+    configure_no_cpus();
+    configure_unavailable_cpu();
+    large_set();
+    get_small_cpu_mask();
+    set_custom_cpu_mask();
+    parent_child();
+}

From 9a0e671cc28d0b95ec238f858cecbf765fdf7f7a Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Sat, 6 Jul 2024 11:24:11 +0200
Subject: [PATCH 2/4] lookup c_ulong instead of hard-coding the chunk size

---
 .../miri/src/concurrency/cpu_affinity.rs      | 39 ++++++-------
 src/tools/miri/src/eval.rs                    |  3 +-
 src/tools/miri/src/helpers.rs                 | 55 +++++++++++--------
 src/tools/miri/src/machine.rs                 |  6 +-
 .../miri/src/shims/unix/foreign_items.rs      | 12 ++--
 5 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/src/tools/miri/src/concurrency/cpu_affinity.rs b/src/tools/miri/src/concurrency/cpu_affinity.rs
index 085900ac3aa6..8df26d718bf6 100644
--- a/src/tools/miri/src/concurrency/cpu_affinity.rs
+++ b/src/tools/miri/src/concurrency/cpu_affinity.rs
@@ -1,6 +1,8 @@
-use crate::bug;
+use rustc_middle::ty::layout::LayoutOf;
 use rustc_target::abi::Endian;
 
+use crate::*;
+
 /// The maximum number of CPUs supported by miri.
 ///
 /// This value is compatible with the libc `CPU_SETSIZE` constant and corresponds to the number
@@ -19,41 +21,34 @@ pub(crate) struct CpuAffinityMask([u8; Self::CPU_MASK_BYTES]);
 impl CpuAffinityMask {
     pub(crate) const CPU_MASK_BYTES: usize = MAX_CPUS / 8;
 
-    pub fn new(target: &rustc_target::spec::Target, cpu_count: u32) -> Self {
+    pub fn new<'tcx>(cx: &impl LayoutOf<'tcx>, cpu_count: u32) -> Self {
         let mut this = Self([0; Self::CPU_MASK_BYTES]);
 
         // the default affinity mask includes only the available CPUs
         for i in 0..cpu_count as usize {
-            this.set(target, i);
+            this.set(cx, i);
         }
 
         this
     }
 
-    pub fn chunk_size(target: &rustc_target::spec::Target) -> u64 {
-        // The actual representation of the CpuAffinityMask is [c_ulong; _], in practice either
-        //
-        // - [u32; 32] on 32-bit platforms
-        // - [u64; 16] everywhere else
-
-        // FIXME: this should be `size_of::<core::ffi::c_ulong>()`
-        u64::from(target.pointer_width / 8)
+    pub fn chunk_size<'tcx>(cx: &impl LayoutOf<'tcx>) -> u64 {
+        // The actual representation of the CpuAffinityMask is [c_ulong; _].
+        let ulong = helpers::path_ty_layout(cx, &["core", "ffi", "c_ulong"]);
+        ulong.size.bytes()
     }
 
-    fn set(&mut self, target: &rustc_target::spec::Target, cpu: usize) {
+    fn set<'tcx>(&mut self, cx: &impl LayoutOf<'tcx>, cpu: usize) {
         // we silently ignore CPUs that are out of bounds. This matches the behavior of
         // `sched_setaffinity` with a mask that specifies more than `CPU_SETSIZE` CPUs.
         if cpu >= MAX_CPUS {
             return;
         }
 
-        // The actual representation of the CpuAffinityMask is [c_ulong; _], in practice either
-        //
-        // - [u32; 32] on 32-bit platforms
-        // - [u64; 16] everywhere else
-        //
+        // The actual representation of the CpuAffinityMask is [c_ulong; _].
         // Within the array elements, we need to use the endianness of the target.
-        match Self::chunk_size(target) {
+        let target = &cx.tcx().sess.target;
+        match Self::chunk_size(cx) {
             4 => {
                 let start = cpu / 32 * 4; // first byte of the correct u32
                 let chunk = self.0[start..].first_chunk_mut::<4>().unwrap();
@@ -72,7 +67,7 @@ impl CpuAffinityMask {
                     Endian::Big => (u64::from_be_bytes(*chunk) | 1 << offset).to_be_bytes(),
                 };
             }
-            other => bug!("other chunk sizes are not supported: {other}"),
+            other => bug!("chunk size not supported: {other}"),
         };
     }
 
@@ -80,13 +75,13 @@ impl CpuAffinityMask {
         self.0.as_slice()
     }
 
-    pub fn from_array(
-        target: &rustc_target::spec::Target,
+    pub fn from_array<'tcx>(
+        cx: &impl LayoutOf<'tcx>,
         cpu_count: u32,
         bytes: [u8; Self::CPU_MASK_BYTES],
     ) -> Option<Self> {
         // mask by what CPUs are actually available
-        let default = Self::new(target, cpu_count);
+        let default = Self::new(cx, cpu_count);
         let masked = std::array::from_fn(|i| bytes[i] & default.0[i]);
 
         // at least one thread must be set for the input to be valid
diff --git a/src/tools/miri/src/eval.rs b/src/tools/miri/src/eval.rs
index 9142b8b5fdbc..2184a4426c8d 100644
--- a/src/tools/miri/src/eval.rs
+++ b/src/tools/miri/src/eval.rs
@@ -282,7 +282,8 @@ pub fn create_ecx<'tcx>(
     })?;
 
     // Make sure we have MIR. We check MIR for some stable monomorphic function in libcore.
-    let sentinel = ecx.try_resolve_path(&["core", "ascii", "escape_default"], Namespace::ValueNS);
+    let sentinel =
+        helpers::try_resolve_path(tcx, &["core", "ascii", "escape_default"], Namespace::ValueNS);
     if !matches!(sentinel, Some(s) if tcx.is_mir_available(s.def.def_id())) {
         tcx.dcx().fatal(
             "the current sysroot was built without `-Zalways-encode-mir`, or libcore seems missing. \
diff --git a/src/tools/miri/src/helpers.rs b/src/tools/miri/src/helpers.rs
index 590e8984e990..ba094c988e5a 100644
--- a/src/tools/miri/src/helpers.rs
+++ b/src/tools/miri/src/helpers.rs
@@ -18,6 +18,7 @@ use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::middle::dependency_format::Linkage;
 use rustc_middle::middle::exported_symbols::ExportedSymbol;
 use rustc_middle::mir;
+use rustc_middle::ty::layout::MaybeResult;
 use rustc_middle::ty::{
     self,
     layout::{LayoutOf, TyAndLayout},
@@ -159,6 +160,35 @@ fn try_resolve_did(tcx: TyCtxt<'_>, path: &[&str], namespace: Option<Namespace>)
     None
 }
 
+/// Gets an instance for a path; fails gracefully if the path does not exist.
+pub fn try_resolve_path<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    path: &[&str],
+    namespace: Namespace,
+) -> Option<ty::Instance<'tcx>> {
+    let did = try_resolve_did(tcx, path, Some(namespace))?;
+    Some(ty::Instance::mono(tcx, did))
+}
+
+/// Gets an instance for a path.
+#[track_caller]
+pub fn resolve_path<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    path: &[&str],
+    namespace: Namespace,
+) -> ty::Instance<'tcx> {
+    try_resolve_path(tcx, path, namespace)
+        .unwrap_or_else(|| panic!("failed to find required Rust item: {path:?}"))
+}
+
+/// Gets the layout of a type at a path.
+#[track_caller]
+pub fn path_ty_layout<'tcx>(cx: &impl LayoutOf<'tcx>, path: &[&str]) -> TyAndLayout<'tcx> {
+    let ty =
+        resolve_path(cx.tcx(), path, Namespace::TypeNS).ty(cx.tcx(), ty::ParamEnv::reveal_all());
+    cx.layout_of(ty).to_result().ok().unwrap()
+}
+
 /// Call `f` for each exported symbol.
 pub fn iter_exported_symbols<'tcx>(
     tcx: TyCtxt<'tcx>,
@@ -259,23 +289,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         try_resolve_did(*self.eval_context_ref().tcx, path, None).is_some()
     }
 
-    /// Gets an instance for a path; fails gracefully if the path does not exist.
-    fn try_resolve_path(&self, path: &[&str], namespace: Namespace) -> Option<ty::Instance<'tcx>> {
-        let tcx = self.eval_context_ref().tcx.tcx;
-        let did = try_resolve_did(tcx, path, Some(namespace))?;
-        Some(ty::Instance::mono(tcx, did))
-    }
-
-    /// Gets an instance for a path.
-    fn resolve_path(&self, path: &[&str], namespace: Namespace) -> ty::Instance<'tcx> {
-        self.try_resolve_path(path, namespace)
-            .unwrap_or_else(|| panic!("failed to find required Rust item: {path:?}"))
-    }
-
     /// Evaluates the scalar at the specified path.
     fn eval_path(&self, path: &[&str]) -> OpTy<'tcx> {
         let this = self.eval_context_ref();
-        let instance = this.resolve_path(path, Namespace::ValueNS);
+        let instance = resolve_path(*this.tcx, path, Namespace::ValueNS);
         // We don't give a span -- this isn't actually used directly by the program anyway.
         let const_val = this.eval_global(instance).unwrap_or_else(|err| {
             panic!("failed to evaluate required Rust item: {path:?}\n{err:?}")
@@ -344,19 +361,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 "`libc` crate is not reliably available on Windows targets; Miri should not use it there"
             );
         }
-        let ty = this
-            .resolve_path(&["libc", name], Namespace::TypeNS)
-            .ty(*this.tcx, ty::ParamEnv::reveal_all());
-        this.layout_of(ty).unwrap()
+        path_ty_layout(this, &["libc", name])
     }
 
     /// Helper function to get the `TyAndLayout` of a `windows` type
     fn windows_ty_layout(&self, name: &str) -> TyAndLayout<'tcx> {
         let this = self.eval_context_ref();
-        let ty = this
-            .resolve_path(&["std", "sys", "pal", "windows", "c", name], Namespace::TypeNS)
-            .ty(*this.tcx, ty::ParamEnv::reveal_all());
-        this.layout_of(ty).unwrap()
+        path_ty_layout(this, &["std", "sys", "pal", "windows", "c", name])
     }
 
     /// Project to the given *named* field (which must be a struct or union type).
diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs
index fee6ab068175..02bfd6ec8150 100644
--- a/src/tools/miri/src/machine.rs
+++ b/src/tools/miri/src/machine.rs
@@ -643,10 +643,8 @@ impl<'tcx> MiriMachine<'tcx> {
         let threads = ThreadManager::default();
         let mut thread_cpu_affinity = FxHashMap::default();
         if matches!(&*tcx.sess.target.os, "linux" | "freebsd" | "android") {
-            thread_cpu_affinity.insert(
-                threads.active_thread(),
-                CpuAffinityMask::new(&tcx.sess.target, config.num_cpus),
-            );
+            thread_cpu_affinity
+                .insert(threads.active_thread(), CpuAffinityMask::new(&layout_cx, config.num_cpus));
         }
         MiriMachine {
             tcx,
diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs
index f5d3e0b536be..f1bae8646d31 100644
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@@ -594,11 +594,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     _ => throw_unsup_format!("`sched_getaffinity` is only supported with a pid of 0 (indicating the current thread)"),
                 };
 
-                // The actual representation of the CpuAffinityMask is [c_ulong; _], in practice either
-                //
-                // - [u32; 32] on 32-bit platforms
-                // - [u64; 16] everywhere else
-                let chunk_size = CpuAffinityMask::chunk_size(&this.tcx.sess.target);
+                // The mask is stored in chunks, and the size must be a whole number of chunks.
+                let chunk_size = CpuAffinityMask::chunk_size(this);
 
                 if this.ptr_is_null(mask)? {
                     let einval = this.eval_libc("EFAULT");
@@ -643,7 +640,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     _ => throw_unsup_format!("`sched_setaffinity` is only supported with a pid of 0 (indicating the current thread)"),
                 };
 
-                #[allow(clippy::map_entry)]
                 if this.ptr_is_null(mask)? {
                     let einval = this.eval_libc("EFAULT");
                     this.set_last_error(einval)?;
@@ -652,9 +648,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     // NOTE: cpusetsize might be smaller than `CpuAffinityMask::CPU_MASK_BYTES`
                     let bits_slice = this.read_bytes_ptr_strip_provenance(mask, Size::from_bytes(cpusetsize))?;
                     // This ignores the bytes beyond `CpuAffinityMask::CPU_MASK_BYTES`
-                    let bits_array: [u8;CpuAffinityMask::CPU_MASK_BYTES] =
+                    let bits_array: [u8; CpuAffinityMask::CPU_MASK_BYTES] =
                         std::array::from_fn(|i| bits_slice.get(i).copied().unwrap_or(0));
-                    match CpuAffinityMask::from_array(&this.tcx.sess.target, this.machine.num_cpus, bits_array) {
+                    match CpuAffinityMask::from_array(this, this.machine.num_cpus, bits_array) {
                         Some(cpuset) => {
                             this.machine.thread_cpu_affinity.insert(thread_id, cpuset);
                             this.write_scalar(Scalar::from_i32(0), dest)?;

From 46019523e8bc5096ae452a39073aed06a6c45e1f Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sat, 6 Jul 2024 12:02:17 +0200
Subject: [PATCH 3/4] `sched_setaffinity`: test `cpusetsize == 0`

---
 .../miri/src/shims/unix/foreign_items.rs      |  4 +++-
 .../miri/tests/pass-dep/libc/libc-affinity.rs | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/tools/miri/src/shims/unix/foreign_items.rs b/src/tools/miri/src/shims/unix/foreign_items.rs
index f1bae8646d31..3a18d6220333 100644
--- a/src/tools/miri/src/shims/unix/foreign_items.rs
+++ b/src/tools/miri/src/shims/unix/foreign_items.rs
@@ -645,7 +645,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     this.set_last_error(einval)?;
                     this.write_scalar(Scalar::from_i32(-1), dest)?;
                 } else {
-                    // NOTE: cpusetsize might be smaller than `CpuAffinityMask::CPU_MASK_BYTES`
+                    // NOTE: cpusetsize might be smaller than `CpuAffinityMask::CPU_MASK_BYTES`.
+                    // Any unspecified bytes are treated as zero here (none of the CPUs are configured).
+                    // This is not exactly documented, so we assume that this is the behavior in practice.
                     let bits_slice = this.read_bytes_ptr_strip_provenance(mask, Size::from_bytes(cpusetsize))?;
                     // This ignores the bytes beyond `CpuAffinityMask::CPU_MASK_BYTES`
                     let bits_array: [u8; CpuAffinityMask::CPU_MASK_BYTES] =
diff --git a/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs b/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
index d360864b97c3..ac3001745db8 100644
--- a/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
+++ b/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
@@ -105,6 +105,24 @@ fn get_small_cpu_mask() {
     }
 }
 
+fn set_small_cpu_mask() {
+    let mut cpuset: cpu_set_t = unsafe { core::mem::MaybeUninit::zeroed().assume_init() };
+
+    let err = unsafe { sched_getaffinity(PID, size_of::<cpu_set_t>(), &mut cpuset) };
+    assert_eq!(err, 0);
+
+    // setting a mask of size 0 is invalid
+    let err = unsafe { sched_setaffinity(PID, 0, &cpuset) };
+    assert_eq!(err, -1);
+    assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
+
+    // any other number of bytes (at least up to `size_of<cpu_set_t>()` will work
+    for i in 1..24 {
+        let err = unsafe { sched_setaffinity(PID, i, &cpuset) };
+        assert_eq!(err, 0, "fail for {i}");
+    }
+}
+
 fn set_custom_cpu_mask() {
     let cpu_count = std::thread::available_parallelism().unwrap().get();
 
@@ -189,6 +207,7 @@ fn main() {
     configure_unavailable_cpu();
     large_set();
     get_small_cpu_mask();
+    set_small_cpu_mask();
     set_custom_cpu_mask();
     parent_child();
 }

From d65e3688df6a282667aed91cea2c7ecfeb636313 Mon Sep 17 00:00:00 2001
From: Folkert <folkert@folkertdev.nl>
Date: Sat, 6 Jul 2024 14:06:07 +0200
Subject: [PATCH 4/4] `sched_setaffinity`: adjust test on BE systems

---
 src/tools/miri/tests/pass-dep/libc/libc-affinity.rs | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs b/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
index ac3001745db8..0e482ab26010 100644
--- a/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
+++ b/src/tools/miri/tests/pass-dep/libc/libc-affinity.rs
@@ -116,8 +116,13 @@ fn set_small_cpu_mask() {
     assert_eq!(err, -1);
     assert_eq!(std::io::Error::last_os_error().kind(), std::io::ErrorKind::InvalidInput);
 
-    // any other number of bytes (at least up to `size_of<cpu_set_t>()` will work
-    for i in 1..24 {
+    // on LE systems, any other number of bytes (at least up to `size_of<cpu_set_t>()`) will work.
+    // on BE systems the CPUs 0..8 are stored in the right-most byte of the first chunk. If that
+    // byte is not included, no valid CPUs are configured. We skip those cases.
+    let cpu_zero_included_length =
+        if cfg!(target_endian = "little") { 1 } else { core::mem::size_of::<std::ffi::c_ulong>() };
+
+    for i in cpu_zero_included_length..24 {
         let err = unsafe { sched_setaffinity(PID, i, &cpuset) };
         assert_eq!(err, 0, "fail for {i}");
     }