Auto merge of #148925 - madsmtm:jemalloc-perf, r=Kobzol

Simplify `jemalloc` setup (without perf regression)

Reland https://github.com/rust-lang/rust/pull/146627 after fixing [the performance regression](https://github.com/rust-lang/rust/pull/148851#issuecomment-3525797560) that caused it to be reverted in https://github.com/rust-lang/rust/pull/148896.

This avoids 65f0b7a (second commit in the initial PR), and adds a comment explaining why `extern crate` is needed here instead of `use` (we need to load `tikv_jemalloc_sys` from the sysroot because of https://github.com/rust-lang/cc-rs/issues/1613).

r? Kobzol
This commit is contained in:
bors 2025-11-23 20:34:07 +00:00
commit c23ed3ef28
6 changed files with 37 additions and 161 deletions

View file

@ -28,9 +28,9 @@ wasi = "=0.14.2"
[dependencies.tikv-jemalloc-sys]
version = "0.6.0"
version = "0.6.1"
optional = true
features = ['unprefixed_malloc_on_supported_platforms']
features = ['override_allocator_on_supported_platforms']
[features]
# tidy-alphabetical-start

View file

@ -7,26 +7,25 @@
// distribution. The obvious way to do this is with the `#[global_allocator]`
// mechanism. However, for complicated reasons (see
// https://github.com/rust-lang/rust/pull/81782#issuecomment-784438001 for some
// details) that mechanism doesn't work here. Also, we must use a consistent
// allocator across the rustc <-> llvm boundary, and `#[global_allocator]`
// wouldn't provide that.
// details) that mechanism doesn't work here. Also, we'd like to use a
// consistent allocator across the rustc <-> llvm boundary, and
// `#[global_allocator]` wouldn't provide that.
//
// Instead, we use a lower-level mechanism. rustc is linked with jemalloc in a
// way such that jemalloc's implementation of `malloc`, `free`, etc., override
// the libc allocator's implementation. This means that Rust's `System`
// allocator, which calls `libc::malloc()` et al., is actually calling into
// jemalloc.
// Instead, we use a lower-level mechanism, namely the
// `"override_allocator_on_supported_platforms"` Cargo feature of jemalloc-sys.
//
// This makes jemalloc-sys override the libc/system allocator's implementation
// of `malloc`, `free`, etc.. This means that Rust's `System` allocator, which
// calls `libc::malloc()` et al., is actually calling into jemalloc.
//
// A consequence of not using `GlobalAlloc` (and the `tikv-jemallocator` crate
// provides an impl of that trait, which is called `Jemalloc`) is that we
// cannot use the sized deallocation APIs (`sdallocx`) that jemalloc provides.
// It's unclear how much performance is lost because of this.
//
// As for the symbol overrides in `main` below: we're pulling in a static copy
// of jemalloc. We need to actually reference its symbols for it to get linked.
// The two crates we link to here, `std` and `rustc_driver`, are both dynamic
// libraries. So we must reference jemalloc symbols one way or another, because
// this file is the only object code in the rustc executable.
// NOTE: Even though Cargo passes `--extern` with `tikv_jemalloc_sys`, we still need to `use` the
// crate for the compiler to see the `#[used]`, see https://github.com/rust-lang/rust/issues/64402.
// This is similarly required if we used a crate with `#[global_allocator]`.
//
// NOTE: if you are reading this comment because you want to set a custom `global_allocator` for
// benchmarking, consider using the benchmarks in the `rustc-perf` collector suite instead:
@ -36,43 +35,9 @@
// to compare their performance, see
// https://github.com/rust-lang/rust/commit/b90cfc887c31c3e7a9e6d462e2464db1fe506175#diff-43914724af6e464c1da2171e4a9b6c7e607d5bc1203fa95c0ab85be4122605ef
// for an example of how to do so.
#[cfg(feature = "jemalloc")]
use tikv_jemalloc_sys as _;
fn main() {
// See the comment at the top of this file for an explanation of this.
#[cfg(feature = "jemalloc")]
{
use std::os::raw::{c_int, c_void};
use tikv_jemalloc_sys as jemalloc_sys;
#[used]
static _F1: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::calloc;
#[used]
static _F2: unsafe extern "C" fn(*mut *mut c_void, usize, usize) -> c_int =
jemalloc_sys::posix_memalign;
#[used]
static _F3: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::aligned_alloc;
#[used]
static _F4: unsafe extern "C" fn(usize) -> *mut c_void = jemalloc_sys::malloc;
#[used]
static _F5: unsafe extern "C" fn(*mut c_void, usize) -> *mut c_void = jemalloc_sys::realloc;
#[used]
static _F6: unsafe extern "C" fn(*mut c_void) = jemalloc_sys::free;
// On OSX, jemalloc doesn't directly override malloc/free, but instead
// registers itself with the allocator's zone APIs in a ctor. However,
// the linker doesn't seem to consider ctors as "used" when statically
// linking, so we need to explicitly depend on the function.
#[cfg(target_os = "macos")]
{
unsafe extern "C" {
fn _rjem_je_zone_register();
}
#[used]
static _F7: unsafe extern "C" fn() = _rjem_je_zone_register;
}
}
rustc_driver::main()
}

View file

@ -61,10 +61,14 @@ extern crate rustc_target;
extern crate rustc_trait_selection;
extern crate test;
// See docs in https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc/src/main.rs
// about jemalloc.
/// See docs in https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc/src/main.rs
/// and https://github.com/rust-lang/rust/pull/146627 for why we need this.
///
/// FIXME(madsmtm): This is loaded from the sysroot that was built with the other `rustc` crates
/// above, instead of via Cargo as you'd normally do. This is currently needed for LTO due to
/// https://github.com/rust-lang/cc-rs/issues/1613.
#[cfg(feature = "jemalloc")]
extern crate tikv_jemalloc_sys as jemalloc_sys;
extern crate tikv_jemalloc_sys as _;
use std::env::{self, VarError};
use std::io::{self, IsTerminal};
@ -124,37 +128,6 @@ mod visit_ast;
mod visit_lib;
pub fn main() {
// See docs in https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc/src/main.rs
// about jemalloc.
#[cfg(feature = "jemalloc")]
{
use std::os::raw::{c_int, c_void};
#[used]
static _F1: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::calloc;
#[used]
static _F2: unsafe extern "C" fn(*mut *mut c_void, usize, usize) -> c_int =
jemalloc_sys::posix_memalign;
#[used]
static _F3: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::aligned_alloc;
#[used]
static _F4: unsafe extern "C" fn(usize) -> *mut c_void = jemalloc_sys::malloc;
#[used]
static _F5: unsafe extern "C" fn(*mut c_void, usize) -> *mut c_void = jemalloc_sys::realloc;
#[used]
static _F6: unsafe extern "C" fn(*mut c_void) = jemalloc_sys::free;
#[cfg(target_os = "macos")]
{
unsafe extern "C" {
fn _rjem_je_zone_register();
}
#[used]
static _F7: unsafe extern "C" fn() = _rjem_je_zone_register;
}
}
let mut early_dcx = EarlyDiagCtxt::new(ErrorOutputType::default());
rustc_driver::install_ice_hook(

View file

@ -13,10 +13,14 @@ extern crate rustc_interface;
extern crate rustc_session;
extern crate rustc_span;
// See docs in https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc/src/main.rs
// about jemalloc.
/// See docs in https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc/src/main.rs
/// and https://github.com/rust-lang/rust/pull/146627 for why we need this.
///
/// FIXME(madsmtm): This is loaded from the sysroot that was built with the other `rustc` crates
/// above, instead of via Cargo as you'd normally do. This is currently needed for LTO due to
/// https://github.com/rust-lang/cc-rs/issues/1613.
#[cfg(feature = "jemalloc")]
extern crate tikv_jemalloc_sys as jemalloc_sys;
extern crate tikv_jemalloc_sys as _;
use clippy_utils::sym;
use declare_clippy_lint::LintListBuilder;
@ -189,36 +193,6 @@ const BUG_REPORT_URL: &str = "https://github.com/rust-lang/rust-clippy/issues/ne
#[expect(clippy::too_many_lines)]
pub fn main() {
// See docs in https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc/src/main.rs
// about jemalloc.
#[cfg(feature = "jemalloc")]
{
use std::os::raw::{c_int, c_void};
#[used]
static _F1: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::calloc;
#[used]
static _F2: unsafe extern "C" fn(*mut *mut c_void, usize, usize) -> c_int = jemalloc_sys::posix_memalign;
#[used]
static _F3: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::aligned_alloc;
#[used]
static _F4: unsafe extern "C" fn(usize) -> *mut c_void = jemalloc_sys::malloc;
#[used]
static _F5: unsafe extern "C" fn(*mut c_void, usize) -> *mut c_void = jemalloc_sys::realloc;
#[used]
static _F6: unsafe extern "C" fn(*mut c_void) = jemalloc_sys::free;
#[cfg(target_os = "macos")]
{
unsafe extern "C" {
fn _rjem_je_zone_register();
}
#[used]
static _F7: unsafe extern "C" fn() = _rjem_je_zone_register;
}
}
let early_dcx = EarlyDiagCtxt::new(ErrorOutputType::default());
rustc_driver::init_rustc_env_logger(&early_dcx);

View file

@ -33,8 +33,8 @@ serde_json = { version = "1.0", optional = true }
# But only for some targets, it fails for others. Rustc configures this in its CI, but we can't
# easily use that since we support of-tree builds.
[target.'cfg(any(target_os = "linux", target_os = "macos"))'.dependencies.tikv-jemalloc-sys]
version = "0.6.0"
features = ['unprefixed_malloc_on_supported_platforms']
version = "0.6.1"
features = ['override_allocator_on_supported_platforms']
[target.'cfg(unix)'.dependencies]
libc = "0.2"

View file

@ -20,6 +20,11 @@ extern crate rustc_middle;
extern crate rustc_session;
extern crate rustc_span;
/// See docs in https://github.com/rust-lang/rust/blob/HEAD/compiler/rustc/src/main.rs
/// and https://github.com/rust-lang/rust/pull/146627 for why we need this `use` statement.
#[cfg(any(target_os = "linux", target_os = "macos"))]
use tikv_jemalloc_sys as _;
mod log;
use std::env;
@ -395,48 +400,7 @@ fn parse_range(val: &str) -> Result<Range<u32>, &'static str> {
Ok(from..to)
}
#[cfg(any(target_os = "linux", target_os = "macos"))]
fn jemalloc_magic() {
// These magic runes are copied from
// <https://github.com/rust-lang/rust/blob/e89bd9428f621545c979c0ec686addc6563a394e/compiler/rustc/src/main.rs#L39>.
// See there for further comments.
use std::os::raw::{c_int, c_void};
use tikv_jemalloc_sys as jemalloc_sys;
#[used]
static _F1: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::calloc;
#[used]
static _F2: unsafe extern "C" fn(*mut *mut c_void, usize, usize) -> c_int =
jemalloc_sys::posix_memalign;
#[used]
static _F3: unsafe extern "C" fn(usize, usize) -> *mut c_void = jemalloc_sys::aligned_alloc;
#[used]
static _F4: unsafe extern "C" fn(usize) -> *mut c_void = jemalloc_sys::malloc;
#[used]
static _F5: unsafe extern "C" fn(*mut c_void, usize) -> *mut c_void = jemalloc_sys::realloc;
#[used]
static _F6: unsafe extern "C" fn(*mut c_void) = jemalloc_sys::free;
// On OSX, jemalloc doesn't directly override malloc/free, but instead
// registers itself with the allocator's zone APIs in a ctor. However,
// the linker doesn't seem to consider ctors as "used" when statically
// linking, so we need to explicitly depend on the function.
#[cfg(target_os = "macos")]
{
unsafe extern "C" {
fn _rjem_je_zone_register();
}
#[used]
static _F7: unsafe extern "C" fn() = _rjem_je_zone_register;
}
}
fn main() {
#[cfg(any(target_os = "linux", target_os = "macos"))]
jemalloc_magic();
let early_dcx = EarlyDiagCtxt::new(ErrorOutputType::default());
// Snapshot a copy of the environment before `rustc` starts messing with it.