Auto merge of #144465 - orlp:system-alloc-tls, r=Mark-Simulacrum

Allow the global allocator to use thread-local storage and std:🧵:current()

Fixes https://github.com/rust-lang/rust/issues/115209.

Currently the thread-local storage implementation uses the `Global` allocator if it needs to allocate memory in some places. This effectively means the global allocator can not use thread-local variables. This is a shame as an allocator is precisely one of the locations where you'd *really* want to use thread-locals. We also see that this lead to hacks such as https://github.com/rust-lang/rust/pull/116402, where we detect re-entrance and abort.

So I've made the places where I could find allocation happening in the TLS implementation use the `System` allocator instead. I also applied this change to the storage allocated for a `Thread` handle so that it may be used care-free in the global allocator as well, for e.g. registering it to a central place or parking primitives.

r? `@joboet`
This commit is contained in:
bors 2025-11-29 02:08:53 +00:00
commit 467250ddb2
21 changed files with 390 additions and 210 deletions

View file

@ -115,6 +115,31 @@ use crate::{cmp, ptr};
/// Whether allocations happen or not is not part of the program behavior, even if it
/// could be detected via an allocator that tracks allocations by printing or otherwise
/// having side effects.
///
/// # Re-entrance
///
/// When implementing a global allocator one has to be careful not to create an infinitely recursive
/// implementation by accident, as many constructs in the Rust standard library may allocate in
/// their implementation. For example, on some platforms [`std::sync::Mutex`] may allocate, so using
/// it is highly problematic in a global allocator.
///
/// Generally speaking for this reason one should stick to library features available through
/// [`core`], and avoid using [`std`] in a global allocator. A few features from [`std`] are
/// guaranteed to not use `#[global_allocator]` to allocate:
///
/// - [`std::thread_local`],
/// - [`std::thread::current`],
/// - [`std::thread::park`] and [`std::thread::Thread`]'s [`unpark`] method and
/// [`Clone`] implementation.
///
/// [`std`]: ../../std/index.html
/// [`std::sync::Mutex`]: ../../std/sync/struct.Mutex.html
/// [`std::thread_local`]: ../../std/macro.thread_local.html
/// [`std::thread::current`]: ../../std/thread/fn.current.html
/// [`std::thread::park`]: ../../std/thread/fn.park.html
/// [`std::thread::Thread`]: ../../std/thread/struct.Thread.html
/// [`unpark`]: ../../std/thread/struct.Thread.html#method.unpark
#[stable(feature = "global_alloc", since = "1.28.0")]
pub unsafe trait GlobalAlloc {
/// Allocates memory as described by the given `layout`.

View file

@ -11,7 +11,7 @@
//!
//! This attribute allows configuring the choice of global allocator.
//! You can use this to implement a completely custom global allocator
//! to route all default allocation requests to a custom object.
//! to route all[^system-alloc] default allocation requests to a custom object.
//!
//! ```rust
//! use std::alloc::{GlobalAlloc, System, Layout};
@ -52,6 +52,13 @@
//!
//! The `#[global_allocator]` can only be used once in a crate
//! or its recursive dependencies.
//!
//! [^system-alloc]: Note that the Rust standard library internals may still
//! directly call [`System`] when necessary (for example for the runtime
//! support typically required to implement a global allocator, see [re-entrance] on [`GlobalAlloc`]
//! for more details).
//!
//! [re-entrance]: trait.GlobalAlloc.html#re-entrance
#![deny(unsafe_op_in_unsafe_fn)]
#![stable(feature = "alloc_module", since = "1.28.0")]

View file

@ -3,6 +3,7 @@
use core::arch::global_asm;
use core::sync::atomic::{Atomic, AtomicUsize, Ordering};
use crate::alloc::System;
use crate::io::Write;
// runtime features
@ -63,7 +64,9 @@ unsafe extern "C" fn tcs_init(secondary: bool) {
#[unsafe(no_mangle)]
extern "C" fn entry(p1: u64, p2: u64, p3: u64, secondary: bool, p4: u64, p5: u64) -> EntryReturn {
// FIXME: how to support TLS in library mode?
let tls = Box::new(tls::Tls::new());
// We use the System allocator here such that the global allocator may use
// thread-locals.
let tls = Box::new_in(tls::Tls::new(), System);
let tls_guard = unsafe { tls.activate() };
if secondary {

View file

@ -89,13 +89,6 @@ impl Tls {
ActiveTls { tls: self }
}
#[allow(unused)]
pub unsafe fn activate_persistent(self: Box<Self>) {
// FIXME: Needs safety information. See entry.S for `set_tls_ptr` definition.
let ptr = Box::into_raw(self).cast_const().cast::<u8>();
unsafe { set_tls_ptr(ptr) };
}
unsafe fn current<'a>() -> &'a Tls {
// FIXME: Needs safety information. See entry.S for `set_tls_ptr` definition.
unsafe { &*(get_tls_ptr() as *const Tls) }

View file

@ -1,4 +1,5 @@
use crate::num::NonZero;
use crate::thread::ThreadInit;
use crate::time::Duration;
use crate::{io, ptr};
@ -16,14 +17,14 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 1 << 20;
impl Thread {
pub unsafe fn new_with_coreid(
stack: usize,
p: Box<dyn FnOnce()>,
init: Box<ThreadInit>,
core_id: isize,
) -> io::Result<Thread> {
let p = Box::into_raw(Box::new(p));
let data = Box::into_raw(init);
let tid = unsafe {
hermit_abi::spawn2(
thread_start,
p.expose_provenance(),
data.expose_provenance(),
hermit_abi::Priority::into(hermit_abi::NORMAL_PRIO),
stack,
core_id,
@ -31,35 +32,34 @@ impl Thread {
};
return if tid == 0 {
// The thread failed to start and as a result p was not consumed. Therefore, it is
// The thread failed to start and as a result data was not consumed. Therefore, it is
// safe to reconstruct the box so that it gets deallocated.
unsafe {
drop(Box::from_raw(p));
drop(Box::from_raw(data));
}
Err(io::const_error!(io::ErrorKind::Uncategorized, "unable to create thread!"))
} else {
Ok(Thread { tid })
};
extern "C" fn thread_start(main: usize) {
unsafe {
// Finally, let's run some code.
Box::from_raw(ptr::with_exposed_provenance::<Box<dyn FnOnce()>>(main).cast_mut())();
extern "C" fn thread_start(data: usize) {
// SAFETY: we are simply recreating the box that was leaked earlier.
let init =
unsafe { Box::from_raw(ptr::with_exposed_provenance_mut::<ThreadInit>(data)) };
let rust_start = init.init();
rust_start();
// run all destructors
// Run all destructors.
unsafe {
crate::sys::thread_local::destructors::run();
crate::rt::thread_cleanup();
}
crate::rt::thread_cleanup();
}
}
pub unsafe fn new(
stack: usize,
_name: Option<&str>,
p: Box<dyn FnOnce()>,
) -> io::Result<Thread> {
pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
unsafe {
Thread::new_with_coreid(stack, p, -1 /* = no specific core */)
Thread::new_with_coreid(stack, init, -1 /* = no specific core */)
}
}

View file

@ -2,6 +2,7 @@
use crate::io;
use crate::sys::pal::abi::{thread, usercalls};
use crate::thread::ThreadInit;
use crate::time::Duration;
pub struct Thread(task_queue::JoinHandle);
@ -13,6 +14,7 @@ pub use self::task_queue::JoinNotifier;
mod task_queue {
use super::wait_notify;
use crate::sync::{Mutex, MutexGuard};
use crate::thread::ThreadInit;
pub type JoinHandle = wait_notify::Waiter;
@ -25,19 +27,20 @@ mod task_queue {
}
pub(super) struct Task {
p: Box<dyn FnOnce() + Send>,
init: Box<ThreadInit>,
done: JoinNotifier,
}
impl Task {
pub(super) fn new(p: Box<dyn FnOnce() + Send>) -> (Task, JoinHandle) {
pub(super) fn new(init: Box<ThreadInit>) -> (Task, JoinHandle) {
let (done, recv) = wait_notify::new();
let done = JoinNotifier(Some(done));
(Task { p, done }, recv)
(Task { init, done }, recv)
}
pub(super) fn run(self) -> JoinNotifier {
(self.p)();
let rust_start = self.init.init();
rust_start();
self.done
}
}
@ -93,14 +96,10 @@ pub mod wait_notify {
impl Thread {
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
pub unsafe fn new(
_stack: usize,
_name: Option<&str>,
p: Box<dyn FnOnce() + Send>,
) -> io::Result<Thread> {
pub unsafe fn new(_stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
let mut queue_lock = task_queue::lock();
unsafe { usercalls::launch_thread()? };
let (task, handle) = task_queue::Task::new(p);
let (task, handle) = task_queue::Task::new(init);
queue_lock.push(task);
Ok(Thread(handle))
}

View file

@ -8,6 +8,7 @@ use crate::sync::atomic::{Atomic, AtomicUsize, Ordering};
use crate::sys::pal::itron::error::{ItronError, expect_success, expect_success_aborting};
use crate::sys::pal::itron::time::dur2reltims;
use crate::sys::pal::itron::{abi, task};
use crate::thread::ThreadInit;
use crate::time::Duration;
use crate::{hint, io};
@ -27,9 +28,9 @@ unsafe impl Sync for Thread {}
/// State data shared between a parent thread and child thread. It's dropped on
/// a transition to one of the final states.
struct ThreadInner {
/// This field is used on thread creation to pass a closure from
/// This field is used on thread creation to pass initialization data from
/// `Thread::new` to the created task.
start: UnsafeCell<ManuallyDrop<Box<dyn FnOnce()>>>,
init: UnsafeCell<ManuallyDrop<Box<ThreadInit>>>,
/// A state machine. Each transition is annotated with `[...]` in the
/// source code.
@ -65,7 +66,7 @@ struct ThreadInner {
lifecycle: Atomic<usize>,
}
// Safety: The only `!Sync` field, `ThreadInner::start`, is only touched by
// Safety: The only `!Sync` field, `ThreadInner::init`, is only touched by
// the task represented by `ThreadInner`.
unsafe impl Sync for ThreadInner {}
@ -84,13 +85,9 @@ impl Thread {
/// # Safety
///
/// See `thread::Builder::spawn_unchecked` for safety requirements.
pub unsafe fn new(
stack: usize,
_name: Option<&str>,
p: Box<dyn FnOnce()>,
) -> io::Result<Thread> {
pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
let inner = Box::new(ThreadInner {
start: UnsafeCell::new(ManuallyDrop::new(p)),
init: UnsafeCell::new(ManuallyDrop::new(init)),
lifecycle: AtomicUsize::new(LIFECYCLE_INIT),
});
@ -100,10 +97,11 @@ impl Thread {
let inner = unsafe { &*p_inner };
// Safety: Since `trampoline` is called only once for each
// `ThreadInner` and only `trampoline` touches `start`,
// `start` contains contents and is safe to mutably borrow.
let p = unsafe { ManuallyDrop::take(&mut *inner.start.get()) };
p();
// `ThreadInner` and only `trampoline` touches `init`,
// `init` contains contents and is safe to mutably borrow.
let init = unsafe { ManuallyDrop::take(&mut *inner.init.get()) };
let rust_start = init.init();
rust_start();
// Fix the current thread's state just in case, so that the
// destructors won't abort

View file

@ -1,5 +1,6 @@
use crate::mem::{self, ManuallyDrop};
use crate::sys::os;
use crate::thread::ThreadInit;
use crate::time::Duration;
use crate::{cmp, io, ptr};
@ -24,12 +25,8 @@ unsafe impl Sync for Thread {}
impl Thread {
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
pub unsafe fn new(
stack: usize,
_name: Option<&str>,
p: Box<dyn FnOnce()>,
) -> io::Result<Thread> {
let p = Box::into_raw(Box::new(p));
pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
let data = Box::into_raw(init);
let mut native: libc::pthread_t = unsafe { mem::zeroed() };
let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() };
assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0);
@ -62,16 +59,16 @@ impl Thread {
}
};
let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) };
// Note: if the thread creation fails and this assert fails, then p will
let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, data as *mut _) };
// Note: if the thread creation fails and this assert fails, then data will
// be leaked. However, an alternative design could cause double-free
// which is clearly worse.
assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0);
return if ret != 0 {
// The thread failed to start and as a result p was not consumed. Therefore, it is
// The thread failed to start and as a result data was not consumed. Therefore, it is
// safe to reconstruct the box so that it gets deallocated.
drop(unsafe { Box::from_raw(p) });
drop(unsafe { Box::from_raw(data) });
Err(io::Error::from_raw_os_error(ret))
} else {
// The new thread will start running earliest after the next yield.
@ -80,15 +77,11 @@ impl Thread {
Ok(Thread { id: native })
};
extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void {
unsafe {
// Next, set up our stack overflow handler which may get triggered if we run
// out of stack.
// this is not necessary in TEE.
//let _handler = stack_overflow::Handler::new();
// Finally, let's run some code.
Box::from_raw(main as *mut Box<dyn FnOnce()>)();
}
extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
// SAFETY: we are simply recreating the box that was leaked earlier.
let init = unsafe { Box::from_raw(data as *mut ThreadInit) };
let rust_start = init.init();
rust_start();
ptr::null_mut()
}
}

View file

@ -14,6 +14,7 @@ use crate::sys::weak::dlsym;
#[cfg(any(target_os = "solaris", target_os = "illumos", target_os = "nto",))]
use crate::sys::weak::weak;
use crate::sys::{os, stack_overflow};
use crate::thread::{ThreadInit, current};
use crate::time::Duration;
use crate::{cmp, io, ptr};
#[cfg(not(any(
@ -30,11 +31,6 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 256 * 1024;
#[cfg(any(target_os = "espidf", target_os = "nuttx"))]
pub const DEFAULT_MIN_STACK_SIZE: usize = 0; // 0 indicates that the stack size configured in the ESP-IDF/NuttX menuconfig system should be used
struct ThreadData {
name: Option<Box<str>>,
f: Box<dyn FnOnce()>,
}
pub struct Thread {
id: libc::pthread_t,
}
@ -47,13 +43,8 @@ unsafe impl Sync for Thread {}
impl Thread {
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub unsafe fn new(
stack: usize,
name: Option<&str>,
f: Box<dyn FnOnce()>,
) -> io::Result<Thread> {
let data = Box::new(ThreadData { name: name.map(Box::from), f });
pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
let data = init;
let mut attr: mem::MaybeUninit<libc::pthread_attr_t> = mem::MaybeUninit::uninit();
assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
let mut attr = DropGuard::new(&mut attr, |attr| {
@ -116,12 +107,16 @@ impl Thread {
extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
unsafe {
let data = Box::from_raw(data as *mut ThreadData);
// Next, set up our stack overflow handler which may get triggered if we run
// out of stack.
let _handler = stack_overflow::Handler::new(data.name);
// Finally, let's run some code.
(data.f)();
// SAFETY: we are simply recreating the box that was leaked earlier.
let init = Box::from_raw(data as *mut ThreadInit);
let rust_start = init.init();
// Set up our thread name and stack overflow handler which may get triggered
// if we run out of stack.
let thread = current();
let _handler = stack_overflow::Handler::new(thread.name().map(Box::from));
rust_start();
}
ptr::null_mut()
}

View file

@ -1,19 +1,22 @@
use crate::ffi::CStr;
use crate::io;
use crate::num::NonZero;
use crate::thread::ThreadInit;
use crate::time::Duration;
// Silence dead code warnings for the otherwise unused ThreadInit::init() call.
#[expect(dead_code)]
fn dummy_init_call(init: Box<ThreadInit>) {
drop(init.init());
}
pub struct Thread(!);
pub const DEFAULT_MIN_STACK_SIZE: usize = 64 * 1024;
impl Thread {
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
pub unsafe fn new(
_stack: usize,
_name: Option<&str>,
_p: Box<dyn FnOnce()>,
) -> io::Result<Thread> {
pub unsafe fn new(_stack: usize, _init: Box<ThreadInit>) -> io::Result<Thread> {
Err(io::Error::UNSUPPORTED_PLATFORM)
}

View file

@ -7,6 +7,8 @@ use crate::mem;
use crate::num::NonZero;
#[cfg(target_feature = "atomics")]
use crate::sys::os;
#[cfg(target_feature = "atomics")]
use crate::thread::ThreadInit;
use crate::time::Duration;
#[cfg(target_feature = "atomics")]
use crate::{cmp, ptr};
@ -73,12 +75,8 @@ pub const DEFAULT_MIN_STACK_SIZE: usize = 1024 * 1024;
#[cfg(target_feature = "atomics")]
impl Thread {
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
pub unsafe fn new(
stack: usize,
_name: Option<&str>,
p: Box<dyn FnOnce()>,
) -> io::Result<Thread> {
let p = Box::into_raw(Box::new(p));
pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
let data = Box::into_raw(init);
let mut native: libc::pthread_t = unsafe { mem::zeroed() };
let mut attr: libc::pthread_attr_t = unsafe { mem::zeroed() };
assert_eq!(unsafe { libc::pthread_attr_init(&mut attr) }, 0);
@ -100,28 +98,28 @@ impl Thread {
}
};
let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, p as *mut _) };
// Note: if the thread creation fails and this assert fails, then p will
let ret = unsafe { libc::pthread_create(&mut native, &attr, thread_start, data as *mut _) };
// Note: if the thread creation fails and this assert fails, then data will
// be leaked. However, an alternative design could cause double-free
// which is clearly worse.
assert_eq!(unsafe { libc::pthread_attr_destroy(&mut attr) }, 0);
return if ret != 0 {
// The thread failed to start and as a result p was not consumed. Therefore, it is
// The thread failed to start and as a result data was not consumed. Therefore, it is
// safe to reconstruct the box so that it gets deallocated.
unsafe {
drop(Box::from_raw(p));
drop(Box::from_raw(data));
}
Err(io::Error::from_raw_os_error(ret))
} else {
Ok(Thread { id: native })
};
extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void {
unsafe {
// Finally, let's run some code.
Box::from_raw(main as *mut Box<dyn FnOnce()>)();
}
extern "C" fn thread_start(data: *mut libc::c_void) -> *mut libc::c_void {
// SAFETY: we are simply recreating the box that was leaked earlier.
let init = unsafe { Box::from_raw(data as *mut ThreadInit) };
let rust_start = init.init();
rust_start();
ptr::null_mut()
}
}

View file

@ -8,6 +8,7 @@ use crate::sys::pal::time::WaitableTimer;
use crate::sys::pal::{dur2timeout, to_u16s};
use crate::sys::{c, stack_overflow};
use crate::sys_common::FromInner;
use crate::thread::ThreadInit;
use crate::time::Duration;
use crate::{io, ptr};
@ -20,23 +21,19 @@ pub struct Thread {
impl Thread {
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
pub unsafe fn new(
stack: usize,
_name: Option<&str>,
p: Box<dyn FnOnce()>,
) -> io::Result<Thread> {
let p = Box::into_raw(Box::new(p));
pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
let data = Box::into_raw(init);
// CreateThread rounds up values for the stack size to the nearest page size (at least 4kb).
// If a value of zero is given then the default stack size is used instead.
// SAFETY: `thread_start` has the right ABI for a thread's entry point.
// `p` is simply passed through to the new thread without being touched.
// `data` is simply passed through to the new thread without being touched.
let ret = unsafe {
let ret = c::CreateThread(
ptr::null_mut(),
stack,
Some(thread_start),
p as *mut _,
data as *mut _,
c::STACK_SIZE_PARAM_IS_A_RESERVATION,
ptr::null_mut(),
);
@ -45,19 +42,21 @@ impl Thread {
return if let Ok(handle) = ret.try_into() {
Ok(Thread { handle: Handle::from_inner(handle) })
} else {
// The thread failed to start and as a result p was not consumed. Therefore, it is
// The thread failed to start and as a result data was not consumed. Therefore, it is
// safe to reconstruct the box so that it gets deallocated.
unsafe { drop(Box::from_raw(p)) };
unsafe { drop(Box::from_raw(data)) };
Err(io::Error::last_os_error())
};
unsafe extern "system" fn thread_start(main: *mut c_void) -> u32 {
// Next, reserve some stack space for if we otherwise run out of stack.
unsafe extern "system" fn thread_start(data: *mut c_void) -> u32 {
// SAFETY: we are simply recreating the box that was leaked earlier.
let init = unsafe { Box::from_raw(data as *mut ThreadInit) };
let rust_start = init.init();
// Reserve some stack space for if we otherwise run out of stack.
stack_overflow::reserve_stack();
// Finally, let's run some code.
// SAFETY: We are simply recreating the box that was leaked earlier.
// It's the responsibility of the one who call `Thread::new` to ensure this is safe to call here.
unsafe { Box::from_raw(main as *mut Box<dyn FnOnce()>)() };
rust_start();
0
}
}

View file

@ -7,6 +7,7 @@ use crate::os::xous::ffi::{
map_memory, update_memory_flags,
};
use crate::os::xous::services::{TicktimerScalar, ticktimer_server};
use crate::thread::ThreadInit;
use crate::time::Duration;
pub struct Thread {
@ -19,12 +20,8 @@ pub const GUARD_PAGE_SIZE: usize = 4096;
impl Thread {
// unsafe: see thread::Builder::spawn_unchecked for safety requirements
pub unsafe fn new(
stack: usize,
_name: Option<&str>,
p: Box<dyn FnOnce()>,
) -> io::Result<Thread> {
let p = Box::into_raw(Box::new(p));
pub unsafe fn new(stack: usize, init: Box<ThreadInit>) -> io::Result<Thread> {
let data = Box::into_raw(init);
let mut stack_size = crate::cmp::max(stack, MIN_STACK_SIZE);
if (stack_size & 4095) != 0 {
@ -65,22 +62,32 @@ impl Thread {
let tid = create_thread(
thread_start as *mut usize,
&mut stack_plus_guard_pages[GUARD_PAGE_SIZE..(stack_size + GUARD_PAGE_SIZE)],
p as usize,
data as usize,
guard_page_pre,
stack_size,
0,
)
.map_err(|code| io::Error::from_raw_os_error(code as i32))?;
#[inline(never)]
fn rust_main_thread_not_inlined(init: Box<ThreadInit>) {
let rust_start = init.init();
rust_start();
}
extern "C" fn thread_start(
main: *mut usize,
data: *mut usize,
guard_page_pre: usize,
stack_size: usize,
) -> ! {
unsafe {
// Run the contents of the new thread.
Box::from_raw(main as *mut Box<dyn FnOnce()>)();
}
// SAFETY: we are simply recreating the box that was leaked earlier.
let init = unsafe { Box::from_raw(data as *mut ThreadInit) };
// Run the main thread with an inline(never) barrier to prevent
// dealloc calls from being reordered to after the TLS has been destroyed.
// See https://github.com/rust-lang/rust/pull/144465#pullrequestreview-3289729950
// for more context.
run_main_thread_not_inlined(init);
// Destroy TLS, which will free the TLS page and call the destructor for
// any thread local storage (if any).

View file

@ -1,19 +1,16 @@
use crate::alloc::System;
use crate::cell::RefCell;
use crate::sys::thread_local::guard;
#[thread_local]
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8)), System>> =
RefCell::new(Vec::new_in(System));
pub unsafe fn register(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
let Ok(mut dtors) = DTORS.try_borrow_mut() else {
// This point can only be reached if the global allocator calls this
// function again.
// FIXME: maybe use the system allocator instead?
rtabort!("the global allocator may not use TLS with destructors");
rtabort!("the System allocator may not use TLS with destructors")
};
guard::enable();
dtors.push((t, dtor));
}
@ -36,7 +33,7 @@ pub unsafe fn run() {
}
None => {
// Free the list memory.
*dtors = Vec::new();
*dtors = Vec::new_in(System);
break;
}
}

View file

@ -38,6 +38,7 @@
use core::arch::asm;
use crate::alloc::System;
use crate::mem::ManuallyDrop;
use crate::os::xous::ffi::{MemoryFlags, map_memory, unmap_memory};
use crate::ptr;
@ -151,7 +152,10 @@ struct Node {
}
unsafe fn register_dtor(key: Key, dtor: Dtor) {
let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() }));
// We use the System allocator here to avoid interfering with a potential
// Global allocator using thread-local storage.
let mut node =
ManuallyDrop::new(Box::new_in(Node { key, dtor, next: ptr::null_mut() }, System));
#[allow(unused_unsafe)]
let mut head = unsafe { DTORS.load(Acquire) };
@ -182,6 +186,11 @@ pub unsafe fn destroy_tls() {
};
}
// This is marked inline(never) to prevent dealloc calls from being reordered
// to after the TLS has been destroyed.
// See https://github.com/rust-lang/rust/pull/144465#pullrequestreview-3289729950
// for more context.
#[inline(never)]
unsafe fn run_dtors() {
let mut any_run = true;

View file

@ -1,6 +1,6 @@
use super::key::{Key, LazyKey, get, set};
use super::{abort_on_dtor_unwind, guard};
use crate::alloc::{self, Layout};
use crate::alloc::{self, GlobalAlloc, Layout, System};
use crate::cell::Cell;
use crate::marker::PhantomData;
use crate::mem::ManuallyDrop;
@ -113,17 +113,19 @@ pub const fn value_align<T: 'static>() -> usize {
crate::mem::align_of::<Value<T>>()
}
/// Equivalent to `Box<Value<T>>`, but potentially over-aligned.
struct AlignedBox<T: 'static, const ALIGN: usize> {
/// Equivalent to `Box<Value<T>, System>`, but potentially over-aligned.
struct AlignedSystemBox<T: 'static, const ALIGN: usize> {
ptr: NonNull<Value<T>>,
}
impl<T: 'static, const ALIGN: usize> AlignedBox<T, ALIGN> {
impl<T: 'static, const ALIGN: usize> AlignedSystemBox<T, ALIGN> {
#[inline]
fn new(v: Value<T>) -> Self {
let layout = Layout::new::<Value<T>>().align_to(ALIGN).unwrap();
let ptr: *mut Value<T> = (unsafe { alloc::alloc(layout) }).cast();
// We use the System allocator here to avoid interfering with a potential
// Global allocator using thread-local storage.
let ptr: *mut Value<T> = (unsafe { System.alloc(layout) }).cast();
let Some(ptr) = NonNull::new(ptr) else {
alloc::handle_alloc_error(layout);
};
@ -143,7 +145,7 @@ impl<T: 'static, const ALIGN: usize> AlignedBox<T, ALIGN> {
}
}
impl<T: 'static, const ALIGN: usize> Deref for AlignedBox<T, ALIGN> {
impl<T: 'static, const ALIGN: usize> Deref for AlignedSystemBox<T, ALIGN> {
type Target = Value<T>;
#[inline]
@ -152,14 +154,14 @@ impl<T: 'static, const ALIGN: usize> Deref for AlignedBox<T, ALIGN> {
}
}
impl<T: 'static, const ALIGN: usize> Drop for AlignedBox<T, ALIGN> {
impl<T: 'static, const ALIGN: usize> Drop for AlignedSystemBox<T, ALIGN> {
#[inline]
fn drop(&mut self) {
let layout = Layout::new::<Value<T>>().align_to(ALIGN).unwrap();
unsafe {
let unwind_result = catch_unwind(AssertUnwindSafe(|| self.ptr.drop_in_place()));
alloc::dealloc(self.ptr.as_ptr().cast(), layout);
System.dealloc(self.ptr.as_ptr().cast(), layout);
if let Err(payload) = unwind_result {
resume_unwind(payload);
}
@ -205,11 +207,11 @@ impl<T: 'static, const ALIGN: usize> Storage<T, ALIGN> {
return ptr::null();
}
let value = AlignedBox::<T, ALIGN>::new(Value {
let value = AlignedSystemBox::<T, ALIGN>::new(Value {
value: i.and_then(Option::take).unwrap_or_else(f),
key,
});
let ptr = AlignedBox::into_raw(value);
let ptr = AlignedSystemBox::into_raw(value);
// SAFETY:
// * key came from a `LazyKey` and is thus correct.
@ -227,7 +229,7 @@ impl<T: 'static, const ALIGN: usize> Storage<T, ALIGN> {
// initializer has already returned and the next scope only starts
// after we return the pointer. Therefore, there can be no references
// to the old value.
drop(unsafe { AlignedBox::<T, ALIGN>::from_raw(old) });
drop(unsafe { AlignedSystemBox::<T, ALIGN>::from_raw(old) });
}
// SAFETY: We just created this value above.
@ -246,7 +248,7 @@ unsafe extern "C" fn destroy_value<T: 'static, const ALIGN: usize>(ptr: *mut u8)
// Note that to prevent an infinite loop we reset it back to null right
// before we return from the destructor ourselves.
abort_on_dtor_unwind(|| {
let ptr = unsafe { AlignedBox::<T, ALIGN>::from_raw(ptr as *mut Value<T>) };
let ptr = unsafe { AlignedSystemBox::<T, ALIGN>::from_raw(ptr as *mut Value<T>) };
let key = ptr.key;
// SAFETY: `key` is the TLS key `ptr` was stored under.
unsafe { set(key, ptr::without_provenance_mut(1)) };

View file

@ -269,22 +269,13 @@ fn init_current(current: *mut ()) -> Thread {
// BUSY exists solely for this check, but as it is in the slow path, the
// extra TLS write above shouldn't matter. The alternative is nearly always
// a stack overflow.
// If you came across this message, contact the author of your allocator.
// If you are said author: A surprising amount of functions inside the
// standard library (e.g. `Mutex`, `thread_local!`, `File` when using long
// paths, even `panic!` when using unwinding), need memory allocation, so
// you'll get circular dependencies all over the place when using them.
// I (joboet) highly recommend using only APIs from core in your allocator
// and implementing your own system abstractions. Still, if you feel that
// a particular API should be entirely allocation-free, feel free to open
// an issue on the Rust repository, we'll see what we can do.
//
// If we reach this point it means our initialization routine ended up
// calling current() either directly, or indirectly through the global
// allocator, which is a bug either way as we may not call the global
// allocator in current().
rtabort!(
"\n\
Attempted to access thread-local data while allocating said data.\n\
Do not access functions that allocate in the global allocator!\n\
This is a bug in the global allocator.\n\
"
"init_current() was re-entrant, which indicates a bug in the Rust threading implementation"
)
} else {
debug_assert_eq!(current, DESTROYED);

View file

@ -24,12 +24,17 @@ use crate::fmt;
/// [`with`]) within a thread, and values that implement [`Drop`] get
/// destructed when a thread exits. Some platform-specific caveats apply, which
/// are explained below.
/// Note that if the destructor panics, the whole process will be [aborted].
/// Note that, should the destructor panic, the whole process will be [aborted].
/// On platforms where initialization requires memory allocation, this is
/// performed directly through [`System`], allowing the [global allocator]
/// to make use of thread local storage.
///
/// A `LocalKey`'s initializer cannot recursively depend on itself. Using a
/// `LocalKey` in this way may cause panics, aborts, or infinite recursion on
/// the first call to `with`.
///
/// [`System`]: crate::alloc::System
/// [global allocator]: crate::alloc
/// [aborted]: crate::process::abort
///
/// # Single-thread Synchronization

View file

@ -158,6 +158,7 @@
#[cfg(all(test, not(any(target_os = "emscripten", target_os = "wasi"))))]
mod tests;
use crate::alloc::System;
use crate::any::Any;
use crate::cell::UnsafeCell;
use crate::ffi::CStr;
@ -211,6 +212,36 @@ pub mod local_impl {
pub use crate::sys::thread_local::*;
}
/// The data passed to the spawned thread for thread initialization. Any thread
/// implementation should start a new thread by calling .init() on this before
/// doing anything else to ensure the current thread is properly initialized and
/// the global allocator works.
pub(crate) struct ThreadInit {
pub handle: Thread,
pub rust_start: Box<dyn FnOnce() + Send>,
}
impl ThreadInit {
/// Initialize the 'current thread' mechanism on this thread, returning the
/// Rust entry point.
pub fn init(self: Box<Self>) -> Box<dyn FnOnce() + Send> {
// Set the current thread before any (de)allocations on the global allocator occur,
// so that it may call std::thread::current() in its implementation. This is also
// why we take Box<Self>, to ensure the Box is not destroyed until after this point.
// Cloning the handle does not invoke the global allocator, it is an Arc.
if let Err(_thread) = set_current(self.handle.clone()) {
// The current thread should not have set yet. Use an abort to save binary size (see #123356).
rtabort!("current thread handle already set during thread spawn");
}
if let Some(name) = self.handle.cname() {
imp::set_name(name);
}
self.rust_start
}
}
////////////////////////////////////////////////////////////////////////////////
// Builder
////////////////////////////////////////////////////////////////////////////////
@ -502,16 +533,14 @@ impl Builder {
});
let id = ThreadId::new();
let my_thread = Thread::new(id, name);
let thread = Thread::new(id, name);
let hooks = if no_hooks {
spawnhook::ChildSpawnHooks::default()
} else {
spawnhook::run_spawn_hooks(&my_thread)
spawnhook::run_spawn_hooks(&thread)
};
let their_thread = my_thread.clone();
let my_packet: Arc<Packet<'scope, T>> = Arc::new(Packet {
scope: scope_data,
result: UnsafeCell::new(None),
@ -543,19 +572,10 @@ impl Builder {
}
let f = MaybeDangling::new(f);
let main = move || {
if let Err(_thread) = set_current(their_thread.clone()) {
// Both the current thread handle and the ID should not be
// initialized yet. Since only the C runtime and some of our
// platform code run before this, this point shouldn't be
// reachable. Use an abort to save binary size (see #123356).
rtabort!("something here is badly broken!");
}
if let Some(name) = their_thread.cname() {
imp::set_name(name);
}
// The entrypoint of the Rust thread, after platform-specific thread
// initialization is done.
let rust_start = move || {
let f = f.into_inner();
let try_result = panic::catch_unwind(panic::AssertUnwindSafe(|| {
crate::sys::backtrace::__rust_begin_short_backtrace(|| hooks.run());
@ -578,11 +598,15 @@ impl Builder {
scope_data.increment_num_running_threads();
}
let main = Box::new(main);
// SAFETY: dynamic size and alignment of the Box remain the same. See below for why the
// lifetime change is justified.
let main =
unsafe { Box::from_raw(Box::into_raw(main) as *mut (dyn FnOnce() + Send + 'static)) };
let rust_start = unsafe {
Box::from_raw(
Box::into_raw(Box::new(rust_start)) as *mut (dyn FnOnce() + Send + 'static)
)
};
let init = Box::new(ThreadInit { handle: thread.clone(), rust_start });
Ok(JoinInner {
// SAFETY:
@ -598,8 +622,8 @@ impl Builder {
// Similarly, the `sys` implementation must guarantee that no references to the closure
// exist after the thread has terminated, which is signaled by `Thread::join`
// returning.
native: unsafe { imp::Thread::new(stack_size, my_thread.name(), main)? },
thread: my_thread,
native: unsafe { imp::Thread::new(stack_size, init)? },
thread,
packet: my_packet,
})
}
@ -1257,21 +1281,41 @@ impl ThreadId {
}
}
_ => {
use crate::sync::{Mutex, PoisonError};
use crate::cell::SyncUnsafeCell;
use crate::hint::spin_loop;
use crate::sync::atomic::{Atomic, AtomicBool};
use crate::thread::yield_now;
static COUNTER: Mutex<u64> = Mutex::new(0);
// If we don't have a 64-bit atomic we use a small spinlock. We don't use Mutex
// here as we might be trying to get the current thread id in the global allocator,
// and on some platforms Mutex requires allocation.
static COUNTER_LOCKED: Atomic<bool> = AtomicBool::new(false);
static COUNTER: SyncUnsafeCell<u64> = SyncUnsafeCell::new(0);
let mut counter = COUNTER.lock().unwrap_or_else(PoisonError::into_inner);
let Some(id) = counter.checked_add(1) else {
// in case the panic handler ends up calling `ThreadId::new()`,
// avoid reentrant lock acquire.
drop(counter);
exhausted();
};
// Acquire lock.
let mut spin = 0;
while COUNTER_LOCKED.compare_exchange_weak(false, true, Ordering::Acquire, Ordering::Relaxed).is_err() {
if spin <= 3 {
for _ in 0..(1 << spin) {
spin_loop();
}
} else {
yield_now();
}
spin += 1;
}
*counter = id;
drop(counter);
ThreadId(NonZero::new(id).unwrap())
// SAFETY: we have an exclusive lock on the counter.
unsafe {
if let Some(id) = (*COUNTER.get()).checked_add(1) {
*COUNTER.get() = id;
COUNTER_LOCKED.store(false, Ordering::Release);
ThreadId(NonZero::new(id).unwrap())
} else {
COUNTER_LOCKED.store(false, Ordering::Release);
exhausted()
}
}
}
}
}
@ -1465,7 +1509,10 @@ impl Inner {
///
/// [`thread::current`]: current::current
pub struct Thread {
inner: Pin<Arc<Inner>>,
// We use the System allocator such that creating or dropping this handle
// does not interfere with a potential Global allocator using thread-local
// storage.
inner: Pin<Arc<Inner, System>>,
}
impl Thread {
@ -1478,7 +1525,7 @@ impl Thread {
// SAFETY: We pin the Arc immediately after creation, so its address never
// changes.
let inner = unsafe {
let mut arc = Arc::<Inner>::new_uninit();
let mut arc = Arc::<Inner, _>::new_uninit_in(System);
let ptr = Arc::get_mut_unchecked(&mut arc).as_mut_ptr();
(&raw mut (*ptr).name).write(name);
(&raw mut (*ptr).id).write(id);
@ -1649,7 +1696,7 @@ impl Thread {
pub fn into_raw(self) -> *const () {
// Safety: We only expose an opaque pointer, which maintains the `Pin` invariant.
let inner = unsafe { Pin::into_inner_unchecked(self.inner) };
Arc::into_raw(inner) as *const ()
Arc::into_raw_with_allocator(inner).0 as *const ()
}
/// Constructs a `Thread` from a raw pointer.
@ -1671,10 +1718,12 @@ impl Thread {
#[unstable(feature = "thread_raw", issue = "97523")]
pub unsafe fn from_raw(ptr: *const ()) -> Thread {
// Safety: Upheld by caller.
unsafe { Thread { inner: Pin::new_unchecked(Arc::from_raw(ptr as *const Inner)) } }
unsafe {
Thread { inner: Pin::new_unchecked(Arc::from_raw_in(ptr as *const Inner, System)) }
}
}
fn cname(&self) -> Option<&CStr> {
pub(crate) fn cname(&self) -> Option<&CStr> {
if let Some(name) = &self.inner.name {
Some(name.as_cstr())
} else if main_thread::get() == Some(self.inner.id) {

View file

@ -14,7 +14,7 @@
//
//@ cdb-command:dx t,d
//@ cdb-check:t,d : [...] [Type: std::thread::Thread *]
//@ cdb-check:[...] inner [...][Type: core::pin::Pin<alloc::sync::Arc<std::thread::Inner,alloc::alloc::Global> >]
//@ cdb-check:[...] inner [...][Type: core::pin::Pin<alloc::sync::Arc<std::thread::Inner,std::alloc::System> >]
use std::thread;

View file

@ -0,0 +1,107 @@
//@ run-pass
//@ needs-threads
use std::alloc::{GlobalAlloc, Layout, System};
use std::hint::black_box;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::thread::{Thread, ThreadId};
static GLOBAL: AtomicUsize = AtomicUsize::new(0);
static SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS: AtomicBool = AtomicBool::new(false);
static LOCAL_TRY_WITH_SUCCEEDED_ALLOC: AtomicBool = AtomicBool::new(false);
static LOCAL_TRY_WITH_SUCCEEDED_DEALLOC: AtomicBool = AtomicBool::new(false);
struct LocalForAllocatorWithoutDrop(ThreadId);
struct LocalForAllocatorWithDrop(Thread);
impl Drop for LocalForAllocatorWithDrop {
fn drop(&mut self) {
GLOBAL.fetch_or(2, Ordering::Relaxed);
}
}
struct LocalForUser(u32);
impl Drop for LocalForUser {
// A user might call the global allocator in a thread-local drop.
fn drop(&mut self) {
self.0 += 1;
drop(black_box(Box::new(self.0)))
}
}
thread_local! {
static LOCAL_FOR_USER0: LocalForUser = LocalForUser(0);
static LOCAL_FOR_ALLOCATOR_WITHOUT_DROP: LocalForAllocatorWithoutDrop = {
LocalForAllocatorWithoutDrop(std::thread::current().id())
};
static LOCAL_FOR_ALLOCATOR_WITH_DROP: LocalForAllocatorWithDrop = {
GLOBAL.fetch_or(1, Ordering::Relaxed);
LocalForAllocatorWithDrop(std::thread::current())
};
static LOCAL_FOR_USER1: LocalForUser = LocalForUser(1);
}
#[global_allocator]
static ALLOC: Alloc = Alloc;
struct Alloc;
unsafe impl GlobalAlloc for Alloc {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
// Make sure we aren't re-entrant.
assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed));
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed);
// Should be infallible.
LOCAL_FOR_ALLOCATOR_WITHOUT_DROP.with(|local| {
assert!(local.0 == std::thread::current().id());
});
// May fail once thread-local destructors start running, and ours has
// been ran.
let try_with_ret = LOCAL_FOR_ALLOCATOR_WITH_DROP.try_with(|local| {
assert!(local.0.id() == std::thread::current().id());
});
LOCAL_TRY_WITH_SUCCEEDED_ALLOC.fetch_or(try_with_ret.is_ok(), Ordering::Relaxed);
let ret = unsafe { System.alloc(layout) };
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed);
ret
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
// Make sure we aren't re-entrant.
assert!(!SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.load(Ordering::Relaxed));
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(true, Ordering::Relaxed);
// Should be infallible.
LOCAL_FOR_ALLOCATOR_WITHOUT_DROP.with(|local| {
assert!(local.0 == std::thread::current().id());
});
// May fail once thread-local destructors start running, and ours has
// been ran.
let try_with_ret = LOCAL_FOR_ALLOCATOR_WITH_DROP.try_with(|local| {
assert!(local.0.id() == std::thread::current().id());
});
LOCAL_TRY_WITH_SUCCEEDED_DEALLOC.fetch_or(try_with_ret.is_ok(), Ordering::Relaxed);
unsafe { System.dealloc(ptr, layout) }
SHOULD_PANIC_ON_GLOBAL_ALLOC_ACCESS.store(false, Ordering::Relaxed);
}
}
fn main() {
std::thread::spawn(|| {
LOCAL_FOR_USER0.with(|l| assert!(l.0 == 0));
std::hint::black_box(vec![1, 2]);
assert!(GLOBAL.load(Ordering::Relaxed) == 1);
LOCAL_FOR_USER1.with(|l| assert!(l.0 == 1));
})
.join()
.unwrap();
assert!(GLOBAL.load(Ordering::Relaxed) == 3);
assert!(LOCAL_TRY_WITH_SUCCEEDED_ALLOC.load(Ordering::Relaxed));
assert!(LOCAL_TRY_WITH_SUCCEEDED_DEALLOC.load(Ordering::Relaxed));
}