Auto merge of #147810 - bjorn3:lto_refactors6, r=wesleywiser

Split LTO out of the main codegen coordinator event loop into a separate event loop on the same thread

This will make it easier to in the future move all this code to link_binary.

Follow up to https://github.com/rust-lang/rust/pull/146209
Part of https://github.com/rust-lang/compiler-team/issues/908
This commit is contained in:
bors 2025-10-22 22:50:15 +00:00
commit 6244effd03

View file

@ -15,8 +15,8 @@ use rustc_data_structures::profiling::{SelfProfilerRef, VerboseTimingGuard};
use rustc_errors::emitter::Emitter;
use rustc_errors::translation::Translator;
use rustc_errors::{
Diag, DiagArgMap, DiagCtxt, DiagMessage, ErrCode, FatalErrorMarker, Level, MultiSpan, Style,
Suggestions,
Diag, DiagArgMap, DiagCtxt, DiagMessage, ErrCode, FatalError, FatalErrorMarker, Level,
MultiSpan, Style, Suggestions,
};
use rustc_fs_util::link_or_copy;
use rustc_incremental::{
@ -374,7 +374,7 @@ fn generate_thin_lto_work<B: ExtraBackendMethods>(
each_linked_rlib_for_lto: &[PathBuf],
needs_thin_lto: Vec<(String, B::ThinBuffer)>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
) -> Vec<(WorkItem<B>, u64)> {
) -> Vec<(ThinLtoWorkItem<B>, u64)> {
let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");
let (lto_modules, copy_jobs) = B::run_thin_lto(
@ -388,11 +388,11 @@ fn generate_thin_lto_work<B: ExtraBackendMethods>(
.into_iter()
.map(|module| {
let cost = module.cost();
(WorkItem::ThinLto(module), cost)
(ThinLtoWorkItem::ThinLto(module), cost)
})
.chain(copy_jobs.into_iter().map(|wp| {
(
WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
ThinLtoWorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
name: wp.cgu_name.clone(),
source: wp,
}),
@ -697,64 +697,73 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
/// Copy the post-LTO artifacts from the incremental cache to the output
/// directory.
CopyPostLtoArtifacts(CachedModuleCodegen),
/// Performs fat LTO on the given module.
FatLto {
exported_symbols_for_lto: Arc<Vec<String>>,
each_linked_rlib_for_lto: Vec<PathBuf>,
needs_fat_lto: Vec<FatLtoInput<B>>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
},
}
enum ThinLtoWorkItem<B: WriteBackendMethods> {
/// Copy the post-LTO artifacts from the incremental cache to the output
/// directory.
CopyPostLtoArtifacts(CachedModuleCodegen),
/// Performs thin-LTO on the given module.
ThinLto(lto::ThinModule<B>),
}
// `pthread_setname()` on *nix ignores anything beyond the first 15
// bytes. Use short descriptions to maximize the space available for
// the module name.
#[cfg(not(windows))]
fn desc(short: &str, _long: &str, name: &str) -> String {
// The short label is three bytes, and is followed by a space. That
// leaves 11 bytes for the CGU name. How we obtain those 11 bytes
// depends on the CGU name form.
//
// - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part
// before the `-cgu.0` is the same for every CGU, so use the
// `cgu.0` part. The number suffix will be different for each
// CGU.
//
// - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole
// name because each CGU will have a unique ASCII hash, and the
// first 11 bytes will be enough to identify it.
//
// - Incremental (with `-Zhuman-readable-cgu-names`), e.g.
// `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole
// name. The first 11 bytes won't be enough to uniquely identify
// it, but no obvious substring will, and this is a rarely used
// option so it doesn't matter much.
//
assert_eq!(short.len(), 3);
let name = if let Some(index) = name.find("-cgu.") {
&name[index + 1..] // +1 skips the leading '-'.
} else {
name
};
format!("{short} {name}")
}
// Windows has no thread name length limit, so use more descriptive names.
#[cfg(windows)]
fn desc(_short: &str, long: &str, name: &str) -> String {
format!("{long} {name}")
}
impl<B: WriteBackendMethods> WorkItem<B> {
/// Generate a short description of this work item suitable for use as a thread name.
fn short_description(&self) -> String {
// `pthread_setname()` on *nix ignores anything beyond the first 15
// bytes. Use short descriptions to maximize the space available for
// the module name.
#[cfg(not(windows))]
fn desc(short: &str, _long: &str, name: &str) -> String {
// The short label is three bytes, and is followed by a space. That
// leaves 11 bytes for the CGU name. How we obtain those 11 bytes
// depends on the CGU name form.
//
// - Non-incremental, e.g. `regex.f10ba03eb5ec7975-cgu.0`: the part
// before the `-cgu.0` is the same for every CGU, so use the
// `cgu.0` part. The number suffix will be different for each
// CGU.
//
// - Incremental (normal), e.g. `2i52vvl2hco29us0`: use the whole
// name because each CGU will have a unique ASCII hash, and the
// first 11 bytes will be enough to identify it.
//
// - Incremental (with `-Zhuman-readable-cgu-names`), e.g.
// `regex.f10ba03eb5ec7975-re_builder.volatile`: use the whole
// name. The first 11 bytes won't be enough to uniquely identify
// it, but no obvious substring will, and this is a rarely used
// option so it doesn't matter much.
//
assert_eq!(short.len(), 3);
let name = if let Some(index) = name.find("-cgu.") {
&name[index + 1..] // +1 skips the leading '-'.
} else {
name
};
format!("{short} {name}")
}
// Windows has no thread name length limit, so use more descriptive names.
#[cfg(windows)]
fn desc(_short: &str, long: &str, name: &str) -> String {
format!("{long} {name}")
}
match self {
WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
WorkItem::FatLto { .. } => desc("lto", "fat LTO module", "everything"),
WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
}
}
}
impl<B: WriteBackendMethods> ThinLtoWorkItem<B> {
/// Generate a short description of this work item suitable for use as a thread name.
fn short_description(&self) -> String {
match self {
ThinLtoWorkItem::CopyPostLtoArtifacts(m) => {
desc("cpy", "copy LTO artifacts for", &m.name)
}
ThinLtoWorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
}
}
}
@ -885,7 +894,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
module: CachedModuleCodegen,
) -> WorkItemResult<B> {
) -> CompiledModule {
let _timer = cgcx
.prof
.generic_activity_with_arg("codegen_copy_artifacts_from_incr_cache", &*module.name);
@ -958,7 +967,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
cgcx.create_dcx().handle().emit_fatal(errors::NoSavedObjectFile { cgu_name: &module.name })
}
WorkItemResult::Finished(CompiledModule {
CompiledModule {
links_from_incr_cache,
kind: ModuleKind::Regular,
name: module.name,
@ -967,17 +976,19 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
bytecode,
assembly,
llvm_ir,
})
}
}
fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
fn do_fat_lto<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
exported_symbols_for_lto: &[String],
each_linked_rlib_for_lto: &[PathBuf],
mut needs_fat_lto: Vec<FatLtoInput<B>>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
) -> WorkItemResult<B> {
let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", "everything");
) -> CompiledModule {
let _timer = cgcx.prof.verbose_generic_activity("LLVM_fatlto");
check_lto_allowed(&cgcx);
for (module, wp) in import_only_modules {
needs_fat_lto.push(FatLtoInput::Serialized { name: wp.cgu_name, buffer: module })
@ -989,19 +1000,155 @@ fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
each_linked_rlib_for_lto,
needs_fat_lto,
);
let module = B::codegen(cgcx, module, &cgcx.module_config);
WorkItemResult::Finished(module)
B::codegen(cgcx, module, &cgcx.module_config)
}
fn do_thin_lto<'a, B: ExtraBackendMethods>(
cgcx: &'a CodegenContext<B>,
exported_symbols_for_lto: Arc<Vec<String>>,
each_linked_rlib_for_lto: Vec<PathBuf>,
needs_thin_lto: Vec<(String, <B as WriteBackendMethods>::ThinBuffer)>,
lto_import_only_modules: Vec<(
SerializedModule<<B as WriteBackendMethods>::ModuleBuffer>,
WorkProduct,
)>,
) -> Vec<CompiledModule> {
let _timer = cgcx.prof.verbose_generic_activity("LLVM_thinlto");
check_lto_allowed(&cgcx);
let (coordinator_send, coordinator_receive) = channel();
// First up, convert our jobserver into a helper thread so we can use normal
// mpsc channels to manage our messages and such.
// After we've requested tokens then we'll, when we can,
// get tokens on `coordinator_receive` which will
// get managed in the main loop below.
let coordinator_send2 = coordinator_send.clone();
let helper = jobserver::client()
.into_helper_thread(move |token| {
drop(coordinator_send2.send(ThinLtoMessage::Token(token)));
})
.expect("failed to spawn helper thread");
let mut work_items = vec![];
// We have LTO work to do. Perform the serial work here of
// figuring out what we're going to LTO and then push a
// bunch of work items onto our queue to do LTO. This all
// happens on the coordinator thread but it's very quick so
// we don't worry about tokens.
for (work, cost) in generate_thin_lto_work(
cgcx,
&exported_symbols_for_lto,
&each_linked_rlib_for_lto,
needs_thin_lto,
lto_import_only_modules,
) {
let insertion_index =
work_items.binary_search_by_key(&cost, |&(_, cost)| cost).unwrap_or_else(|e| e);
work_items.insert(insertion_index, (work, cost));
if cgcx.parallel {
helper.request_token();
}
}
let mut codegen_aborted = None;
// These are the Jobserver Tokens we currently hold. Does not include
// the implicit Token the compiler process owns no matter what.
let mut tokens = vec![];
// Amount of tokens that are used (including the implicit token).
let mut used_token_count = 0;
let mut compiled_modules = vec![];
// Run the message loop while there's still anything that needs message
// processing. Note that as soon as codegen is aborted we simply want to
// wait for all existing work to finish, so many of the conditions here
// only apply if codegen hasn't been aborted as they represent pending
// work to be done.
loop {
if codegen_aborted.is_none() {
if used_token_count == 0 && work_items.is_empty() {
// All codegen work is done.
break;
}
// Spin up what work we can, only doing this while we've got available
// parallelism slots and work left to spawn.
while used_token_count < tokens.len() + 1
&& let Some((item, _)) = work_items.pop()
{
spawn_thin_lto_work(&cgcx, coordinator_send.clone(), item);
used_token_count += 1;
}
} else {
// Don't queue up any more work if codegen was aborted, we're
// just waiting for our existing children to finish.
if used_token_count == 0 {
break;
}
}
// Relinquish accidentally acquired extra tokens. Subtract 1 for the implicit token.
tokens.truncate(used_token_count.saturating_sub(1));
match coordinator_receive.recv().unwrap() {
// Save the token locally and the next turn of the loop will use
// this to spawn a new unit of work, or it may get dropped
// immediately if we have no more work to spawn.
ThinLtoMessage::Token(token) => match token {
Ok(token) => {
tokens.push(token);
}
Err(e) => {
let msg = &format!("failed to acquire jobserver token: {e}");
cgcx.diag_emitter.fatal(msg);
codegen_aborted = Some(FatalError);
}
},
ThinLtoMessage::WorkItem { result } => {
// If a thread exits successfully then we drop a token associated
// with that worker and update our `used_token_count` count.
// We may later re-acquire a token to continue running more work.
// We may also not actually drop a token here if the worker was
// running with an "ephemeral token".
used_token_count -= 1;
match result {
Ok(compiled_module) => compiled_modules.push(compiled_module),
Err(Some(WorkerFatalError)) => {
// Like `CodegenAborted`, wait for remaining work to finish.
codegen_aborted = Some(FatalError);
}
Err(None) => {
// If the thread failed that means it panicked, so
// we abort immediately.
bug!("worker thread panicked");
}
}
}
}
}
if let Some(codegen_aborted) = codegen_aborted {
codegen_aborted.raise();
}
compiled_modules
}
fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
module: lto::ThinModule<B>,
) -> WorkItemResult<B> {
) -> CompiledModule {
let _timer = cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", module.name());
let module = B::optimize_thin(cgcx, module);
let module = B::codegen(cgcx, module, &cgcx.module_config);
WorkItemResult::Finished(module)
B::codegen(cgcx, module, &cgcx.module_config)
}
/// Messages sent to the coordinator.
@ -1035,6 +1182,17 @@ pub(crate) enum Message<B: WriteBackendMethods> {
CodegenAborted,
}
/// Messages sent to the coordinator.
pub(crate) enum ThinLtoMessage {
/// A jobserver token has become available. Sent from the jobserver helper
/// thread.
Token(io::Result<Acquired>),
/// The backend has finished processing a work item for a codegen unit.
/// Sent from a backend worker thread.
WorkItem { result: Result<CompiledModule, Option<WorkerFatalError>> },
}
/// A message sent from the coordinator thread to the main thread telling it to
/// process another codegen unit.
pub struct CguMessage;
@ -1086,9 +1244,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
regular_config: Arc<ModuleConfig>,
allocator_config: Arc<ModuleConfig>,
allocator_module: Option<ModuleCodegen<B::Module>>,
tx_to_llvm_workers: Sender<Message<B>>,
coordinator_send: Sender<Message<B>>,
) -> thread::JoinHandle<Result<CompiledModules, ()>> {
let coordinator_send = tx_to_llvm_workers;
let sess = tcx.sess;
let mut each_linked_rlib_for_lto = Vec::new();
@ -1307,7 +1464,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
let mut needs_fat_lto = Vec::new();
let mut needs_thin_lto = Vec::new();
let mut lto_import_only_modules = Vec::new();
let mut started_lto = false;
/// Possible state transitions:
/// - Ongoing -> Completed
@ -1397,63 +1553,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
if running_with_any_token(main_thread_state, running_with_own_token) == 0
&& work_items.is_empty()
{
// All codegen work is done. Do we have LTO work to do?
if needs_fat_lto.is_empty()
&& needs_thin_lto.is_empty()
&& lto_import_only_modules.is_empty()
{
// Nothing more to do!
break;
}
// We have LTO work to do. Perform the serial work here of
// figuring out what we're going to LTO and then push a
// bunch of work items onto our queue to do LTO. This all
// happens on the coordinator thread but it's very quick so
// we don't worry about tokens.
assert!(!started_lto);
started_lto = true;
let needs_fat_lto = mem::take(&mut needs_fat_lto);
let needs_thin_lto = mem::take(&mut needs_thin_lto);
let import_only_modules = mem::take(&mut lto_import_only_modules);
let each_linked_rlib_file_for_lto =
mem::take(&mut each_linked_rlib_file_for_lto);
check_lto_allowed(&cgcx);
if !needs_fat_lto.is_empty() {
assert!(needs_thin_lto.is_empty());
work_items.push((
WorkItem::FatLto {
exported_symbols_for_lto: Arc::clone(&exported_symbols_for_lto),
each_linked_rlib_for_lto: each_linked_rlib_file_for_lto,
needs_fat_lto,
import_only_modules,
},
0,
));
if cgcx.parallel {
helper.request_token();
}
} else {
for (work, cost) in generate_thin_lto_work(
&cgcx,
&exported_symbols_for_lto,
&each_linked_rlib_file_for_lto,
needs_thin_lto,
import_only_modules,
) {
let insertion_index = work_items
.binary_search_by_key(&cost, |&(_, cost)| cost)
.unwrap_or_else(|e| e);
work_items.insert(insertion_index, (work, cost));
if cgcx.parallel {
helper.request_token();
}
}
}
// All codegen work is done.
break;
}
// In this branch, we know that everything has been codegened,
@ -1591,12 +1692,10 @@ fn start_executing_work<B: ExtraBackendMethods>(
compiled_modules.push(compiled_module);
}
Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => {
assert!(!started_lto);
assert!(needs_thin_lto.is_empty());
needs_fat_lto.push(fat_lto_input);
}
Ok(WorkItemResult::NeedsThinLto(name, thin_buffer)) => {
assert!(!started_lto);
assert!(needs_fat_lto.is_empty());
needs_thin_lto.push((name, thin_buffer));
}
@ -1613,7 +1712,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
}
Message::AddImportOnlyModule { module_data, work_product } => {
assert!(!started_lto);
assert_eq!(codegen_state, Ongoing);
assert_eq!(main_thread_state, MainThreadState::Codegenning);
lto_import_only_modules.push((module_data, work_product));
@ -1622,12 +1720,43 @@ fn start_executing_work<B: ExtraBackendMethods>(
}
}
// Drop to print timings
drop(llvm_start_time);
if codegen_state == Aborted {
return Err(());
}
// Drop to print timings
drop(llvm_start_time);
drop(codegen_state);
drop(tokens);
drop(helper);
assert!(work_items.is_empty());
if !needs_fat_lto.is_empty() {
assert!(compiled_modules.is_empty());
assert!(needs_thin_lto.is_empty());
// This uses the implicit token
let module = do_fat_lto(
&cgcx,
&exported_symbols_for_lto,
&each_linked_rlib_file_for_lto,
needs_fat_lto,
lto_import_only_modules,
);
compiled_modules.push(module);
} else if !needs_thin_lto.is_empty() || !lto_import_only_modules.is_empty() {
assert!(compiled_modules.is_empty());
assert!(needs_fat_lto.is_empty());
compiled_modules.extend(do_thin_lto(
&cgcx,
exported_symbols_for_lto,
each_linked_rlib_file_for_lto,
needs_thin_lto,
lto_import_only_modules,
));
}
// Regardless of what order these modules completed in, report them to
// the backend in the same order every time to ensure that we're handing
@ -1718,20 +1847,9 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
WorkItem::Optimize(m) => execute_optimize_work_item(&cgcx, m),
WorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m),
WorkItem::FatLto {
exported_symbols_for_lto,
each_linked_rlib_for_lto,
needs_fat_lto,
import_only_modules,
} => execute_fat_lto_work_item(
&cgcx,
&exported_symbols_for_lto,
&each_linked_rlib_for_lto,
needs_fat_lto,
import_only_modules,
),
WorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m),
WorkItem::CopyPostLtoArtifacts(m) => {
WorkItemResult::Finished(execute_copy_from_cache_work_item(&cgcx, m))
}
}));
let msg = match result {
@ -1751,6 +1869,36 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
.expect("failed to spawn work thread");
}
fn spawn_thin_lto_work<'a, B: ExtraBackendMethods>(
cgcx: &'a CodegenContext<B>,
coordinator_send: Sender<ThinLtoMessage>,
work: ThinLtoWorkItem<B>,
) {
let cgcx = cgcx.clone();
B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
let result = std::panic::catch_unwind(AssertUnwindSafe(|| match work {
ThinLtoWorkItem::CopyPostLtoArtifacts(m) => execute_copy_from_cache_work_item(&cgcx, m),
ThinLtoWorkItem::ThinLto(m) => execute_thin_lto_work_item(&cgcx, m),
}));
let msg = match result {
Ok(result) => ThinLtoMessage::WorkItem { result: Ok(result) },
// We ignore any `FatalError` coming out of `execute_work_item`, as a
// diagnostic was already sent off to the main thread - just surface
// that there was an error in this worker.
Err(err) if err.is::<FatalErrorMarker>() => {
ThinLtoMessage::WorkItem { result: Err(Some(WorkerFatalError)) }
}
Err(_) => ThinLtoMessage::WorkItem { result: Err(None) },
};
drop(coordinator_send.send(msg));
})
.expect("failed to spawn work thread");
}
enum SharedEmitterMessage {
Diagnostic(Diagnostic),
InlineAsmError(SpanData, String, Level, Option<(String, Vec<InnerSpan>)>),