Add -Z large-data-threshold

This flag allows specifying the threshold size for placing static data
in large data sections when using the medium code model on x86-64.

When using -Ccode-model=medium, data smaller than this threshold uses
RIP-relative addressing (32-bit offsets), while larger data uses
absolute 64-bit addressing. This allows the compiler to generate more
efficient code for smaller data while still supporting data larger than
2GB.

This mirrors the -mlarge-data-threshold flag available in GCC and Clang.
The default threshold is 65536 bytes (64KB) if not specified, matching
LLVM's default behavior.
This commit is contained in:
Farid Zakaria 2026-01-07 11:25:17 -08:00
parent 84c84421cc
commit 93f2e80f4a
7 changed files with 115 additions and 1 deletions

View file

@ -39,6 +39,7 @@ impl OwnedTargetMachine {
debug_info_compression: llvm::CompressionKind,
use_emulated_tls: bool,
use_wasm_eh: bool,
large_data_threshold: u64,
) -> Result<Self, LlvmError<'static>> {
// SAFETY: llvm::LLVMRustCreateTargetMachine copies pointed to data
let tm_ptr = unsafe {
@ -65,6 +66,7 @@ impl OwnedTargetMachine {
debug_info_compression,
use_emulated_tls,
use_wasm_eh,
large_data_threshold,
)
};

View file

@ -272,6 +272,8 @@ pub(crate) fn target_machine_factory(
let use_wasm_eh = wants_wasm_eh(sess);
let large_data_threshold = sess.opts.unstable_opts.large_data_threshold.unwrap_or(0);
let prof = SelfProfilerRef::clone(&sess.prof);
Arc::new(move |config: TargetMachineFactoryConfig| {
// Self-profile timer for invoking a factory to create a target machine.
@ -313,6 +315,7 @@ pub(crate) fn target_machine_factory(
debuginfo_compression,
use_emulated_tls,
use_wasm_eh,
large_data_threshold,
)
})
}

View file

@ -2338,6 +2338,7 @@ unsafe extern "C" {
DebugInfoCompression: CompressionKind,
UseEmulatedTls: bool,
UseWasmEH: bool,
LargeDataThreshold: u64,
) -> *mut TargetMachine;
pub(crate) fn LLVMRustAddLibraryInfo<'a>(

View file

@ -305,7 +305,7 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
bool EmitStackSizeSection, bool RelaxELFRelocations, bool UseInitArray,
const char *SplitDwarfFile, const char *OutputObjFile,
LLVMRustCompressionKind DebugInfoCompression, bool UseEmulatedTls,
bool UseWasmEH) {
bool UseWasmEH, uint64_t LargeDataThreshold) {
auto OptLevel = fromRust(RustOptLevel);
auto RM = fromRust(RustReloc);
@ -381,6 +381,11 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
TargetMachine *TM = TheTarget->createTargetMachine(
Trip.getTriple(), CPU, Feature, Options, RM, CM, OptLevel);
#endif
if (LargeDataThreshold != 0) {
TM->setLargeDataThreshold(LargeDataThreshold);
}
return wrap(TM);
}

View file

@ -2444,6 +2444,9 @@ options! {
`=skip-entry`
`=skip-exit`
Multiple options can be combined with commas."),
large_data_threshold: Option<u64> = (None, parse_opt_number, [TRACKED],
"set the threshold for objects to be stored in a \"large data\" section \
(only effective with -Ccode-model=medium, default: 65536)"),
layout_seed: Option<u64> = (None, parse_opt_number, [TRACKED],
"seed layout randomization"),
link_directives: bool = (true, parse_bool, [TRACKED],

View file

@ -0,0 +1,27 @@
# `large-data-threshold`
-----------------------
This flag controls the threshold for static data to be placed in large data
sections when using the `medium` code model on x86-64.
When using `-Ccode-model=medium`, static data smaller than this threshold will
use RIP-relative addressing (32-bit offsets), while larger data will use
absolute 64-bit addressing. This allows the compiler to generate more efficient
code for smaller data while still supporting data larger than 2GB.
The default threshold is 65536 bytes (64KB) if not specified.
## Example
```sh
rustc -Ccode-model=medium -Zlarge-data-threshold=1024 main.rs
```
This sets the threshold to 1KB, meaning only data smaller than 1024 bytes will
use RIP-relative addressing.
## Platform Support
This flag is only effective on x86-64 targets when using `-Ccode-model=medium`.
On other architectures or with other code models, this flag has no effect.

View file

@ -0,0 +1,73 @@
// Test for -Z large_data_threshold=...
// This test verifies that with the medium code model, data above the threshold
// is placed in large data sections (.ldata, .lbss, .lrodata).
//@ assembly-output: emit-asm
//@ compile-flags: -Ccode-model=medium -Zlarge-data-threshold=4
//@ compile-flags: --target=x86_64-unknown-linux-gnu
//@ needs-llvm-components: x86
#![feature(no_core, lang_items)]
#![no_std]
#![no_core]
#![crate_type = "lib"]
#[lang = "pointee_sized"]
pub trait PointeeSized {}
#[lang = "meta_sized"]
pub trait MetaSized: PointeeSized {}
#[lang = "sized"]
pub trait Sized: MetaSized {}
#[lang = "drop_in_place"]
fn drop_in_place<T>(_: *mut T) {}
#[used]
#[no_mangle]
// U is below the threshold, should be in .data
static mut U: u16 = 123;
#[used]
#[no_mangle]
// V is below the threshold, should be in .bss
static mut V: u16 = 0;
#[used]
#[no_mangle]
// W is at the threshold, should be in .data
static mut W: u32 = 123;
#[used]
#[no_mangle]
// X is at the threshold, should be in .bss
static mut X: u32 = 0;
#[used]
#[no_mangle]
// Y is over the threshold, should be in .ldata
static mut Y: u64 = 123;
#[used]
#[no_mangle]
// Z is over the threshold, should be in .lbss
static mut Z: u64 = 0;
// CHECK: .section .data.U,
// CHECK-NOT: .section
// CHECK: U:
// CHECK: .section .bss.V,
// CHECK-NOT: .section
// CHECK: V:
// CHECK: .section .data.W,
// CHECK-NOT: .section
// CHECK: W:
// CHECK: .section .bss.X,
// CHECK-NOT: .section
// CHECK: X:
// CHECK: .section .ldata.Y,
// CHECK-NOT: .section
// CHECK: Y:
// CHECK: .section .lbss.Z,
// CHECK-NOT: .section
// CHECK: Z: