Auto merge of #147022 - Zalathar:no-args, r=wesleywiser

Remove current code for embedding command-line args in PDB

The compiler currently has code that will obtain a list of quoted command-line arguments, and pass it through to TargetMachine creation, so that the command-line args can be embedded in PDB output.

This PR removes that code, due to subtle concerns that might not have been apparent when it was originally added.

---

Those concerns include:
- The entire command-line quoting process is repeated every time a target-machine-factory is created. In incremental builds this typically occurs 500+ times, instead of happening only once. The repeated quoting constitutes a large chunk of instructions executed in the `large-workspace` benchmark.
  - See https://github.com/rust-lang/rust/pull/146804#issuecomment-3317322958 for an example of the perf consequences of skipping all that work.
  - This overhead occurs even when building for targets or configurations that don't emit PDB output.
- Command-line arguments are obtained in a way that completely bypasses the query system, which is a problem for the integrity of incremental compilation.
  - Fixing this alone is likely to inhibit incremental rebuilds for most or all CGUs, even in builds that don't emit PDB output.
- Command-line arguments and the executable path are obtained in a way that completely bypasses the compiler's path-remapping system, which is a reproducibility hazard.
  - https://github.com/rust-lang/rust/issues/128842

---

Relevant PRs:
- https://github.com/rust-lang/rust/pull/113492
- https://github.com/rust-lang/rust/pull/130446
- https://github.com/rust-lang/rust/pull/131805
- https://github.com/rust-lang/rust/pull/146700
- https://github.com/rust-lang/rust/pull/146973

Zulip thread:
- https://rust-lang.zulipchat.com/#narrow/channel/131828-t-compiler/topic/Some.20PDB.20info.20bypasses.20the.20query.20system.20and.20path.20remapping/with/541432211

---

According to rust-lang/rust#96475, one of the big motivations for embedding the command-line arguments was to enable tools like Live++. [It appears that Live++ doesn't actually support Rust yet](https://rust-lang.zulipchat.com/#narrow/channel/131828-t-compiler/topic/embeded.20compiler.20args.20and.20--remap-path-prefix/near/523800010), so it's possible that there aren't any existing workflows for this removal to break.

In the future, there could be a case for reintroducing some or all of this functionality, guarded behind an opt-in flag so that it doesn't cause problems for other users. But as it stands, the current implementation puts a disproportionate burden on other users and on compiler maintainers.
This commit is contained in:
bors 2025-10-22 00:21:08 +00:00
commit 96fe3c31c2
19 changed files with 1 additions and 162 deletions

View file

@ -1,37 +0,0 @@
#[cfg(test)]
mod tests;
/// Joins command-line arguments into a single space-separated string, quoting
/// and escaping individual arguments as necessary.
///
/// The result is intended to be informational, for embedding in debug metadata,
/// and might not be properly quoted/escaped for actual command-line use.
pub(crate) fn quote_command_line_args(args: &[String]) -> String {
// Start with a decent-sized buffer, since rustc invocations tend to be long.
let mut buf = String::with_capacity(128);
for arg in args {
if !buf.is_empty() {
buf.push(' ');
}
print_arg_quoted(&mut buf, arg);
}
buf
}
/// Equivalent to LLVM's `sys::printArg` with quoting always enabled
/// (see llvm/lib/Support/Program.cpp).
fn print_arg_quoted(buf: &mut String, arg: &str) {
buf.reserve(arg.len() + 2);
buf.push('"');
for ch in arg.chars() {
if matches!(ch, '"' | '\\' | '$') {
buf.push('\\');
}
buf.push(ch);
}
buf.push('"');
}

View file

@ -1,25 +0,0 @@
#[test]
fn quote_command_line_args() {
use super::quote_command_line_args;
struct Case<'a> {
args: &'a [&'a str],
expected: &'a str,
}
let cases = &[
Case { args: &[], expected: "" },
Case { args: &["--hello", "world"], expected: r#""--hello" "world""# },
Case { args: &["--hello world"], expected: r#""--hello world""# },
Case {
args: &["plain", "$dollar", "spa ce", r"back\slash", r#""quote""#, "plain"],
expected: r#""plain" "\$dollar" "spa ce" "back\\slash" "\"quote\"" "plain""#,
},
];
for &Case { args, expected } in cases {
let args = args.iter().copied().map(str::to_owned).collect::<Vec<_>>();
let actual = quote_command_line_args(&args);
assert_eq!(actual, expected, "args {args:?}");
}
}

View file

@ -1,5 +1,4 @@
pub(crate) mod archive;
mod command_line_args;
pub(crate) mod lto;
pub(crate) mod owned_target_machine;
mod profiling;

View file

@ -38,8 +38,6 @@ impl OwnedTargetMachine {
output_obj_file: &CStr,
debug_info_compression: &CStr,
use_emulated_tls: bool,
argv0: &str,
command_line_args: &str,
use_wasm_eh: bool,
) -> Result<Self, LlvmError<'static>> {
// SAFETY: llvm::LLVMRustCreateTargetMachine copies pointed to data
@ -66,10 +64,6 @@ impl OwnedTargetMachine {
output_obj_file.as_ptr(),
debug_info_compression.as_ptr(),
use_emulated_tls,
argv0.as_ptr(),
argv0.len(),
command_line_args.as_ptr(),
command_line_args.len(),
use_wasm_eh,
)
};

View file

@ -31,7 +31,6 @@ use rustc_span::{BytePos, InnerSpan, Pos, SpanData, SyntaxContext, sym};
use rustc_target::spec::{CodeModel, FloatAbi, RelocModel, SanitizerSet, SplitDebuginfo, TlsModel};
use tracing::{debug, trace};
use crate::back::command_line_args::quote_command_line_args;
use crate::back::lto::ThinBuffer;
use crate::back::owned_target_machine::OwnedTargetMachine;
use crate::back::profiling::{
@ -253,19 +252,6 @@ pub(crate) fn target_machine_factory(
let use_emulated_tls = matches!(sess.tls_model(), TlsModel::Emulated);
// Command-line information to be included in the target machine.
// This seems to only be used for embedding in PDB debuginfo files.
// FIXME(Zalathar): Maybe skip this for non-PDB targets?
let argv0 = std::env::current_exe()
.unwrap_or_default()
.into_os_string()
.into_string()
.unwrap_or_default();
let command_line_args = quote_command_line_args(&sess.expanded_args);
// Self-profile counter for the number of bytes produced by command-line quoting.
// Values are summed, so the summary result is cumulative across all TM factories.
sess.prof.artifact_size("quoted_command_line_args", "-", command_line_args.len() as u64);
let debuginfo_compression = sess.opts.debuginfo_compression.to_string();
match sess.opts.debuginfo_compression {
rustc_session::config::DebugInfoCompression::Zlib => {
@ -326,8 +312,6 @@ pub(crate) fn target_machine_factory(
&output_obj_file,
&debuginfo_compression,
use_emulated_tls,
&argv0,
&command_line_args,
use_wasm_eh,
)
})

View file

@ -2330,10 +2330,6 @@ unsafe extern "C" {
OutputObjFile: *const c_char,
DebugInfoCompression: *const c_char,
UseEmulatedTls: bool,
Argv0: *const c_uchar, // See "PTR_LEN_STR".
Argv0Len: size_t,
CommandLineArgs: *const c_uchar, // See "PTR_LEN_STR".
CommandLineArgsLen: size_t,
UseWasmEH: bool,
) -> *mut TargetMachine;

View file

@ -346,12 +346,6 @@ pub struct CodegenContext<B: WriteBackendMethods> {
pub split_dwarf_kind: rustc_session::config::SplitDwarfKind,
pub pointer_size: Size,
/// All commandline args used to invoke the compiler, with @file args fully expanded.
/// This will only be used within debug info, e.g. in the pdb file on windows
/// This is mainly useful for other tools that reads that debuginfo to figure out
/// how to call the compiler with the same arguments.
pub expanded_args: Vec<String>,
/// Emitter to use for diagnostics produced during codegen.
pub diag_emitter: SharedEmitter,
/// LLVM optimizations for which we want to print remarks.
@ -1153,7 +1147,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
remark: sess.opts.cg.remark.clone(),
remark_dir,
incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()),
expanded_args: tcx.sess.expanded_args.clone(),
diag_emitter: shared_emitter.clone(),
output_filenames: Arc::clone(tcx.output_filenames(())),
module_config: regular_config,

View file

@ -269,7 +269,6 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send))
make_codegen_backend: None,
registry: diagnostics_registry(),
using_internal_features: &USING_INTERNAL_FEATURES,
expanded_args: args,
};
callbacks.config(&mut config);

View file

@ -376,12 +376,6 @@ pub struct Config {
/// enabled. Makes it so that "please report a bug" is hidden, as ICEs with
/// internal features are wontfix, and they are usually the cause of the ICEs.
pub using_internal_features: &'static std::sync::atomic::AtomicBool,
/// All commandline args used to invoke the compiler, with @file args fully expanded.
/// This will only be used within debug info, e.g. in the pdb file on windows
/// This is mainly useful for other tools that reads that debuginfo to figure out
/// how to call the compiler with the same arguments.
pub expanded_args: Vec<String>,
}
/// Initialize jobserver before getting `jobserver::client` and `build_session`.
@ -480,7 +474,6 @@ pub fn run_compiler<R: Send>(config: Config, f: impl FnOnce(&Compiler) -> R + Se
util::rustc_version_str().unwrap_or("unknown"),
config.ice_file,
config.using_internal_features,
config.expanded_args,
);
codegen_backend.init(&sess);

View file

@ -78,7 +78,6 @@ where
"",
None,
&USING_INTERNAL_FEATURES,
Default::default(),
);
let cfg = parse_cfg(sess.dcx(), matches.opt_strs("cfg"));
let cfg = build_configuration(&sess, cfg);

View file

@ -271,9 +271,7 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
bool TrapUnreachable, bool Singlethread, bool VerboseAsm,
bool EmitStackSizeSection, bool RelaxELFRelocations, bool UseInitArray,
const char *SplitDwarfFile, const char *OutputObjFile,
const char *DebugInfoCompression, bool UseEmulatedTls, const char *Argv0,
size_t Argv0Len, const char *CommandLineArgs, size_t CommandLineArgsLen,
bool UseWasmEH) {
const char *DebugInfoCompression, bool UseEmulatedTls, bool UseWasmEH) {
auto OptLevel = fromRust(RustOptLevel);
auto RM = fromRust(RustReloc);
@ -348,11 +346,6 @@ extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
Options.EmitStackSizeSection = EmitStackSizeSection;
if (Argv0 != nullptr)
Options.MCOptions.Argv0 = {Argv0, Argv0Len};
if (CommandLineArgs != nullptr)
Options.MCOptions.CommandlineArgs = {CommandLineArgs, CommandLineArgsLen};
#if LLVM_VERSION_GE(21, 0)
TargetMachine *TM = TheTarget->createTargetMachine(Trip, CPU, Feature,
Options, RM, CM, OptLevel);

View file

@ -148,12 +148,6 @@ pub struct Session {
/// None signifies that this is not tracked.
pub using_internal_features: &'static AtomicBool,
/// All commandline args used to invoke the compiler, with @file args fully expanded.
/// This will only be used within debug info, e.g. in the pdb file on windows
/// This is mainly useful for other tools that reads that debuginfo to figure out
/// how to call the compiler with the same arguments.
pub expanded_args: Vec<String>,
target_filesearch: FileSearch,
host_filesearch: FileSearch,
@ -1017,7 +1011,6 @@ pub fn build_session(
cfg_version: &'static str,
ice_file: Option<PathBuf>,
using_internal_features: &'static AtomicBool,
expanded_args: Vec<String>,
) -> Session {
// FIXME: This is not general enough to make the warning lint completely override
// normal diagnostic warnings, since the warning lint can also be denied and changed
@ -1134,7 +1127,6 @@ pub fn build_session(
unstable_target_features: Default::default(),
cfg_version,
using_internal_features,
expanded_args,
target_filesearch,
host_filesearch,
invocation_temp,

View file

@ -165,12 +165,6 @@ pub(crate) struct Options {
/// to have it in both places.
pub(crate) unstable_features: rustc_feature::UnstableFeatures,
/// All commandline args used to invoke the compiler, with @file args fully expanded.
/// This will only be used within debug info, e.g. in the pdb file on windows
/// This is mainly useful for other tools that reads that debuginfo to figure out
/// how to call the compiler with the same arguments.
pub(crate) expanded_args: Vec<String>,
/// Arguments to be used when compiling doctests.
pub(crate) doctest_build_args: Vec<String>,
@ -870,7 +864,6 @@ impl Options {
json_unused_externs,
scrape_examples_options,
unstable_features,
expanded_args: args,
doctest_build_args,
target_modifiers,
};

View file

@ -213,7 +213,6 @@ pub(crate) fn create_config(
describe_lints,
lint_cap,
scrape_examples_options,
expanded_args,
remap_path_prefix,
target_modifiers,
..
@ -326,7 +325,6 @@ pub(crate) fn create_config(
registry: rustc_driver::diagnostics_registry(),
ice_file: None,
using_internal_features: &USING_INTERNAL_FEATURES,
expanded_args,
}
}

View file

@ -191,7 +191,6 @@ pub(crate) fn run(dcx: DiagCtxtHandle<'_>, input: Input, options: RustdocOptions
registry: rustc_driver::diagnostics_registry(),
ice_file: None,
using_internal_features: &rustc_driver::USING_INTERNAL_FEATURES,
expanded_args: options.expanded_args.clone(),
};
let externs = options.externs.clone();

View file

@ -1,4 +0,0 @@
CHECK: LF_BUILDINFO
CHECK: rustc.exe
CHECK: main.rs
CHECK: "-g" "--crate-name" "my_crate_name" "--crate-type" "bin" "-Cmetadata=dc9ef878b0a48666"

View file

@ -1 +0,0 @@
fn main() {}

View file

@ -1,25 +0,0 @@
// Check if the pdb file contains the following information in the LF_BUILDINFO:
// 1. full path to the compiler (cl)
// 2. the commandline args to compile it (cmd)
// This is because these used to be missing in #96475.
// See https://github.com/rust-lang/rust/pull/113492
//@ only-windows-msvc
// Reason: pdb files are unique to this architecture
use run_make_support::{llvm, rustc};
fn main() {
rustc()
.input("main.rs")
.arg("-g")
.crate_name("my_crate_name")
.crate_type("bin")
.metadata("dc9ef878b0a48666")
.run();
let pdbutil_result =
llvm::llvm_pdbutil().arg("dump").arg("-ids").input("my_crate_name.pdb").run();
llvm::llvm_filecheck().patterns("filecheck.txt").stdin_buf(pdbutil_result.stdout_utf8()).run();
}

View file

@ -74,7 +74,6 @@ fn compile(code: String, output: PathBuf, sysroot: Sysroot, linker: Option<&Path
make_codegen_backend: None,
registry: rustc_driver::diagnostics_registry(),
using_internal_features: &rustc_driver::USING_INTERNAL_FEATURES,
expanded_args: Default::default(),
};
interface::run_compiler(config, |compiler| {