241 lines
9.2 KiB
Rust
241 lines
9.2 KiB
Rust
use crate::mem;
|
|
use crate::ptr;
|
|
use crate::sync::atomic::AtomicPtr;
|
|
use crate::sync::atomic::Ordering::SeqCst;
|
|
use crate::sys::c;
|
|
|
|
pub type Key = c::DWORD;
|
|
pub type Dtor = unsafe extern fn(*mut u8);
|
|
|
|
// Turns out, like pretty much everything, Windows is pretty close the
|
|
// functionality that Unix provides, but slightly different! In the case of
|
|
// TLS, Windows does not provide an API to provide a destructor for a TLS
|
|
// variable. This ends up being pretty crucial to this implementation, so we
|
|
// need a way around this.
|
|
//
|
|
// The solution here ended up being a little obscure, but fear not, the
|
|
// internet has informed me [1][2] that this solution is not unique (no way
|
|
// I could have thought of it as well!). The key idea is to insert some hook
|
|
// somewhere to run arbitrary code on thread termination. With this in place
|
|
// we'll be able to run anything we like, including all TLS destructors!
|
|
//
|
|
// To accomplish this feat, we perform a number of threads, all contained
|
|
// within this module:
|
|
//
|
|
// * All TLS destructors are tracked by *us*, not the windows runtime. This
|
|
// means that we have a global list of destructors for each TLS key that
|
|
// we know about.
|
|
// * When a thread exits, we run over the entire list and run dtors for all
|
|
// non-null keys. This attempts to match Unix semantics in this regard.
|
|
//
|
|
// This ends up having the overhead of using a global list, having some
|
|
// locks here and there, and in general just adding some more code bloat. We
|
|
// attempt to optimize runtime by forgetting keys that don't have
|
|
// destructors, but this only gets us so far.
|
|
//
|
|
// For more details and nitty-gritty, see the code sections below!
|
|
//
|
|
// [1]: http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
|
|
// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base
|
|
// /threading/thread_local_storage_win.cc#L42
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Native bindings
|
|
//
|
|
// This section is just raw bindings to the native functions that Windows
|
|
// provides, There's a few extra calls to deal with destructors.
|
|
|
|
#[inline]
|
|
pub unsafe fn create(dtor: Option<Dtor>) -> Key {
|
|
let key = c::TlsAlloc();
|
|
assert!(key != c::TLS_OUT_OF_INDEXES);
|
|
if let Some(f) = dtor {
|
|
register_dtor(key, f);
|
|
}
|
|
return key;
|
|
}
|
|
|
|
#[inline]
|
|
pub unsafe fn set(key: Key, value: *mut u8) {
|
|
let r = c::TlsSetValue(key, value as c::LPVOID);
|
|
debug_assert!(r != 0);
|
|
}
|
|
|
|
#[inline]
|
|
pub unsafe fn get(key: Key) -> *mut u8 {
|
|
c::TlsGetValue(key) as *mut u8
|
|
}
|
|
|
|
#[inline]
|
|
pub unsafe fn destroy(_key: Key) {
|
|
rtabort!("can't destroy tls keys on windows")
|
|
}
|
|
|
|
#[inline]
|
|
pub fn requires_synchronized_create() -> bool {
|
|
true
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Dtor registration
|
|
//
|
|
// Windows has no native support for running destructors so we manage our own
|
|
// list of destructors to keep track of how to destroy keys. We then install a
|
|
// callback later to get invoked whenever a thread exits, running all
|
|
// appropriate destructors.
|
|
//
|
|
// Currently unregistration from this list is not supported. A destructor can be
|
|
// registered but cannot be unregistered. There's various simplifying reasons
|
|
// for doing this, the big ones being:
|
|
//
|
|
// 1. Currently we don't even support deallocating TLS keys, so normal operation
|
|
// doesn't need to deallocate a destructor.
|
|
// 2. There is no point in time where we know we can unregister a destructor
|
|
// because it could always be getting run by some remote thread.
|
|
//
|
|
// Typically processes have a statically known set of TLS keys which is pretty
|
|
// small, and we'd want to keep this memory alive for the whole process anyway
|
|
// really.
|
|
//
|
|
// Perhaps one day we can fold the `Box` here into a static allocation,
|
|
// expanding the `StaticKey` structure to contain not only a slot for the TLS
|
|
// key but also a slot for the destructor queue on windows. An optimization for
|
|
// another day!
|
|
|
|
static DTORS: AtomicPtr<Node> = AtomicPtr::new(ptr::null_mut());
|
|
|
|
struct Node {
|
|
dtor: Dtor,
|
|
key: Key,
|
|
next: *mut Node,
|
|
}
|
|
|
|
unsafe fn register_dtor(key: Key, dtor: Dtor) {
|
|
let mut node = Box::new(Node {
|
|
key,
|
|
dtor,
|
|
next: ptr::null_mut(),
|
|
});
|
|
|
|
let mut head = DTORS.load(SeqCst);
|
|
loop {
|
|
node.next = head;
|
|
match DTORS.compare_exchange(head, &mut *node, SeqCst, SeqCst) {
|
|
Ok(_) => return mem::forget(node),
|
|
Err(cur) => head = cur,
|
|
}
|
|
}
|
|
}
|
|
|
|
// -------------------------------------------------------------------------
|
|
// Where the Magic (TM) Happens
|
|
//
|
|
// If you're looking at this code, and wondering "what is this doing?",
|
|
// you're not alone! I'll try to break this down step by step:
|
|
//
|
|
// # What's up with CRT$XLB?
|
|
//
|
|
// For anything about TLS destructors to work on Windows, we have to be able
|
|
// to run *something* when a thread exits. To do so, we place a very special
|
|
// static in a very special location. If this is encoded in just the right
|
|
// way, the kernel's loader is apparently nice enough to run some function
|
|
// of ours whenever a thread exits! How nice of the kernel!
|
|
//
|
|
// Lots of detailed information can be found in source [1] above, but the
|
|
// gist of it is that this is leveraging a feature of Microsoft's PE format
|
|
// (executable format) which is not actually used by any compilers today.
|
|
// This apparently translates to any callbacks in the ".CRT$XLB" section
|
|
// being run on certain events.
|
|
//
|
|
// So after all that, we use the compiler's #[link_section] feature to place
|
|
// a callback pointer into the magic section so it ends up being called.
|
|
//
|
|
// # What's up with this callback?
|
|
//
|
|
// The callback specified receives a number of parameters from... someone!
|
|
// (the kernel? the runtime? I'm not quite sure!) There are a few events that
|
|
// this gets invoked for, but we're currently only interested on when a
|
|
// thread or a process "detaches" (exits). The process part happens for the
|
|
// last thread and the thread part happens for any normal thread.
|
|
//
|
|
// # Ok, what's up with running all these destructors?
|
|
//
|
|
// This will likely need to be improved over time, but this function
|
|
// attempts a "poor man's" destructor callback system. Once we've got a list
|
|
// of what to run, we iterate over all keys, check their values, and then run
|
|
// destructors if the values turn out to be non null (setting them to null just
|
|
// beforehand). We do this a few times in a loop to basically match Unix
|
|
// semantics. If we don't reach a fixed point after a short while then we just
|
|
// inevitably leak something most likely.
|
|
//
|
|
// # The article mentions weird stuff about "/INCLUDE"?
|
|
//
|
|
// It sure does! Specifically we're talking about this quote:
|
|
//
|
|
// The Microsoft run-time library facilitates this process by defining a
|
|
// memory image of the TLS Directory and giving it the special name
|
|
// “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
|
|
// linker looks for this memory image and uses the data there to create the
|
|
// TLS Directory. Other compilers that support TLS and work with the
|
|
// Microsoft linker must use this same technique.
|
|
//
|
|
// Basically what this means is that if we want support for our TLS
|
|
// destructors/our hook being called then we need to make sure the linker does
|
|
// not omit this symbol. Otherwise it will omit it and our callback won't be
|
|
// wired up.
|
|
//
|
|
// We don't actually use the `/INCLUDE` linker flag here like the article
|
|
// mentions because the Rust compiler doesn't propagate linker flags, but
|
|
// instead we use a shim function which performs a volatile 1-byte load from
|
|
// the address of the symbol to ensure it sticks around.
|
|
|
|
#[link_section = ".CRT$XLB"]
|
|
#[allow(dead_code, unused_variables)]
|
|
#[used] // we don't want LLVM eliminating this symbol for any reason, and
|
|
// when the symbol makes it to the linker the linker will take over
|
|
pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD,
|
|
c::LPVOID) =
|
|
on_tls_callback;
|
|
|
|
#[allow(dead_code, unused_variables)]
|
|
unsafe extern "system" fn on_tls_callback(h: c::LPVOID,
|
|
dwReason: c::DWORD,
|
|
pv: c::LPVOID) {
|
|
if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
|
|
run_dtors();
|
|
}
|
|
|
|
// See comments above for what this is doing. Note that we don't need this
|
|
// trickery on GNU windows, just on MSVC.
|
|
reference_tls_used();
|
|
#[cfg(target_env = "msvc")]
|
|
unsafe fn reference_tls_used() {
|
|
extern { static _tls_used: u8; }
|
|
crate::intrinsics::volatile_load(&_tls_used);
|
|
}
|
|
#[cfg(not(target_env = "msvc"))]
|
|
unsafe fn reference_tls_used() {}
|
|
}
|
|
|
|
#[allow(dead_code)] // actually called above
|
|
unsafe fn run_dtors() {
|
|
let mut any_run = true;
|
|
for _ in 0..5 {
|
|
if !any_run {
|
|
break
|
|
}
|
|
any_run = false;
|
|
let mut cur = DTORS.load(SeqCst);
|
|
while !cur.is_null() {
|
|
let ptr = c::TlsGetValue((*cur).key);
|
|
|
|
if !ptr.is_null() {
|
|
c::TlsSetValue((*cur).key, ptr::null_mut());
|
|
((*cur).dtor)(ptr as *mut _);
|
|
any_run = true;
|
|
}
|
|
|
|
cur = (*cur).next;
|
|
}
|
|
}
|
|
}
|