Add -Z large-data-threshold

This flag allows specifying the threshold size for placing static data in large data sections when using the medium code model on x86-64. When using -Ccode-model=medium, data smaller than this threshold uses RIP-relative addressing (32-bit offsets), while larger data uses absolute 64-bit addressing. This allows the compiler to generate more efficient code for smaller data while still supporting data larger than 2GB. This mirrors the -mlarge-data-threshold flag available in GCC and Clang. The default threshold is 65536 bytes (64KB) if not specified, matching LLVM's default behavior.
2026-01-07 11:25:17 -08:00 · 2026-01-07 11:25:17 -08:00 · 93f2e80f4a
commit 93f2e80f4a
parent 84c84421cc
7 changed files with 115 additions and 1 deletions
--- a/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
+++ b/compiler/rustc_codegen_llvm/src/back/owned_target_machine.rs
@ -39,6 +39,7 @@ impl OwnedTargetMachine {
        debug_info_compression: llvm::CompressionKind,
        use_emulated_tls: bool,
        use_wasm_eh: bool,
+        large_data_threshold: u64,
    ) -> Result<Self, LlvmError<'static>> {
        // SAFETY: llvm::LLVMRustCreateTargetMachine copies pointed to data
        let tm_ptr = unsafe {
@ -65,6 +66,7 @@ impl OwnedTargetMachine {
                debug_info_compression,
                use_emulated_tls,
                use_wasm_eh,
+                large_data_threshold,
            )
        };

--- a/compiler/rustc_codegen_llvm/src/back/write.rs
+++ b/compiler/rustc_codegen_llvm/src/back/write.rs
@ -272,6 +272,8 @@ pub(crate) fn target_machine_factory(

    let use_wasm_eh = wants_wasm_eh(sess);

+    let large_data_threshold = sess.opts.unstable_opts.large_data_threshold.unwrap_or(0);
+
    let prof = SelfProfilerRef::clone(&sess.prof);
    Arc::new(move |config: TargetMachineFactoryConfig| {
        // Self-profile timer for invoking a factory to create a target machine.
@ -313,6 +315,7 @@ pub(crate) fn target_machine_factory(
            debuginfo_compression,
            use_emulated_tls,
            use_wasm_eh,
+            large_data_threshold,
        )
    })
 }
--- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@ -2338,6 +2338,7 @@ unsafe extern "C" {
        DebugInfoCompression: CompressionKind,
        UseEmulatedTls: bool,
        UseWasmEH: bool,
+        LargeDataThreshold: u64,
    ) -> *mut TargetMachine;

    pub(crate) fn LLVMRustAddLibraryInfo<'a>(
--- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
+++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp
@ -305,7 +305,7 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
    bool EmitStackSizeSection, bool RelaxELFRelocations, bool UseInitArray,
    const char *SplitDwarfFile, const char *OutputObjFile,
    LLVMRustCompressionKind DebugInfoCompression, bool UseEmulatedTls,
-    bool UseWasmEH) {
+    bool UseWasmEH, uint64_t LargeDataThreshold) {

  auto OptLevel = fromRust(RustOptLevel);
  auto RM = fromRust(RustReloc);
@ -381,6 +381,11 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
  TargetMachine *TM = TheTarget->createTargetMachine(
      Trip.getTriple(), CPU, Feature, Options, RM, CM, OptLevel);
 #endif
+
+  if (LargeDataThreshold != 0) {
+    TM->setLargeDataThreshold(LargeDataThreshold);
+  }
+
  return wrap(TM);
 }

--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@ -2444,6 +2444,9 @@ options! {
         `=skip-entry`
         `=skip-exit`
         Multiple options can be combined with commas."),
+    large_data_threshold: Option<u64> = (None, parse_opt_number, [TRACKED],
+        "set the threshold for objects to be stored in a \"large data\" section \
+         (only effective with -Ccode-model=medium, default: 65536)"),
    layout_seed: Option<u64> = (None, parse_opt_number, [TRACKED],
        "seed layout randomization"),
    link_directives: bool = (true, parse_bool, [TRACKED],
--- a/src/doc/unstable-book/src/compiler-flags/large-data-threshold.md
+++ b/src/doc/unstable-book/src/compiler-flags/large-data-threshold.md
@ -0,0 +1,27 @@
+# `large-data-threshold`
+
+-----------------------
+
+This flag controls the threshold for static data to be placed in large data
+sections when using the `medium` code model on x86-64.
+
+When using `-Ccode-model=medium`, static data smaller than this threshold will
+use RIP-relative addressing (32-bit offsets), while larger data will use
+absolute 64-bit addressing. This allows the compiler to generate more efficient
+code for smaller data while still supporting data larger than 2GB.
+
+The default threshold is 65536 bytes (64KB) if not specified.
+
+## Example
+
+```sh
+rustc -Ccode-model=medium -Zlarge-data-threshold=1024 main.rs
+```
+
+This sets the threshold to 1KB, meaning only data smaller than 1024 bytes will
+use RIP-relative addressing.
+
+## Platform Support
+
+This flag is only effective on x86-64 targets when using `-Ccode-model=medium`.
+On other architectures or with other code models, this flag has no effect.
--- a/tests/assembly-llvm/large_data_threshold.rs
+++ b/tests/assembly-llvm/large_data_threshold.rs
@ -0,0 +1,73 @@
+// Test for -Z large_data_threshold=...
+// This test verifies that with the medium code model, data above the threshold
+// is placed in large data sections (.ldata, .lbss, .lrodata).
+//@ assembly-output: emit-asm
+//@ compile-flags: -Ccode-model=medium -Zlarge-data-threshold=4
+//@ compile-flags: --target=x86_64-unknown-linux-gnu
+//@ needs-llvm-components: x86
+
+#![feature(no_core, lang_items)]
+#![no_std]
+#![no_core]
+#![crate_type = "lib"]
+
+#[lang = "pointee_sized"]
+pub trait PointeeSized {}
+
+#[lang = "meta_sized"]
+pub trait MetaSized: PointeeSized {}
+
+#[lang = "sized"]
+pub trait Sized: MetaSized {}
+
+#[lang = "drop_in_place"]
+fn drop_in_place<T>(_: *mut T) {}
+
+#[used]
+#[no_mangle]
+// U is below the threshold, should be in .data
+static mut U: u16 = 123;
+
+#[used]
+#[no_mangle]
+// V is below the threshold, should be in .bss
+static mut V: u16 = 0;
+
+#[used]
+#[no_mangle]
+// W is at the threshold, should be in .data
+static mut W: u32 = 123;
+
+#[used]
+#[no_mangle]
+// X is at the threshold, should be in .bss
+static mut X: u32 = 0;
+
+#[used]
+#[no_mangle]
+// Y is over the threshold, should be in .ldata
+static mut Y: u64 = 123;
+
+#[used]
+#[no_mangle]
+// Z is over the threshold, should be in .lbss
+static mut Z: u64 = 0;
+
+// CHECK: .section .data.U,
+// CHECK-NOT: .section
+// CHECK: U:
+// CHECK: .section .bss.V,
+// CHECK-NOT: .section
+// CHECK: V:
+// CHECK: .section .data.W,
+// CHECK-NOT: .section
+// CHECK: W:
+// CHECK: .section .bss.X,
+// CHECK-NOT: .section
+// CHECK: X:
+// CHECK: .section .ldata.Y,
+// CHECK-NOT: .section
+// CHECK: Y:
+// CHECK: .section .lbss.Z,
+// CHECK-NOT: .section
+// CHECK: Z: