Rollup merge of #143388 - bjorn3:lto_refactors, r=compiler-errors

Various refactors to the LTO handling code

In particular reducing the sharing of code paths between fat and thin-LTO and making the fat LTO implementation more self-contained. This also moves some autodiff handling out of cg_ssa into cg_llvm given that Enzyme only works with LLVM anyway and an implementation for another backend may do things entirely differently. This will also make it a bit easier to split LTO handling out of the coordinator thread main loop into a separate loop, which should reduce the complexity of the coordinator thread.
This commit is contained in:
León Orell Valerian Liehr 2025-07-17 03:58:28 +02:00 committed by GitHub
commit be5f8f299d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 154 additions and 240 deletions

View file

@ -24,7 +24,7 @@ use std::sync::Arc;
use gccjit::{Context, OutputKind};
use object::read::archive::ArchiveFile;
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared};
use rustc_codegen_ssa::back::symbol_export;
use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
use rustc_codegen_ssa::traits::*;
@ -176,7 +176,7 @@ pub(crate) fn run_fat(
cgcx: &CodegenContext<GccCodegenBackend>,
modules: Vec<FatLtoInput<GccCodegenBackend>>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> {
) -> Result<ModuleCodegen<GccContext>, FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
let lto_data = prepare_lto(cgcx, dcx)?;
@ -201,7 +201,7 @@ fn fat_lto(
mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
tmp_path: TempDir,
//symbols_below_threshold: &[String],
) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> {
) -> Result<ModuleCodegen<GccContext>, FatalError> {
let _timer = cgcx.prof.generic_activity("GCC_fat_lto_build_monolithic_module");
info!("going for a fat lto");
@ -334,7 +334,7 @@ fn fat_lto(
// of now.
module.module_llvm.temp_dir = Some(tmp_path);
Ok(LtoModuleCodegen::Fat(module))
Ok(module)
}
pub struct ModuleBuffer(PathBuf);
@ -358,7 +358,7 @@ pub(crate) fn run_thin(
cgcx: &CodegenContext<GccCodegenBackend>,
modules: Vec<(String, ThinBuffer)>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
) -> Result<(Vec<ThinModule<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
let lto_data = prepare_lto(cgcx, dcx)?;
@ -427,7 +427,7 @@ fn thin_lto(
tmp_path: TempDir,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
//_symbols_below_threshold: &[String],
) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
) -> Result<(Vec<ThinModule<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
info!("going for that thin, thin LTO");
@ -573,8 +573,7 @@ fn thin_lto(
}*/
info!(" - {}: re-compiled", module_name);
opt_jobs
.push(LtoModuleCodegen::Thin(ThinModule { shared: shared.clone(), idx: module_index }));
opt_jobs.push(ThinModule { shared: shared.clone(), idx: module_index });
}
// Save the current ThinLTO import information for the next compilation

View file

@ -16,10 +16,12 @@ use crate::{GccCodegenBackend, GccContext};
pub(crate) fn codegen(
cgcx: &CodegenContext<GccCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: ModuleCodegen<GccContext>,
config: &ModuleConfig,
) -> Result<CompiledModule, FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
let _timer = cgcx.prof.generic_activity_with_arg("GCC_module_codegen", &*module.name);
{
let context = &module.module_llvm.context;

View file

@ -93,7 +93,7 @@ use gccjit::{CType, Context, OptimizationLevel};
use gccjit::{TargetInfo, Version};
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule};
use rustc_codegen_ssa::back::write::{
CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn,
};
@ -353,11 +353,16 @@ impl WriteBackendMethods for GccCodegenBackend {
type ThinData = ThinData;
type ThinBuffer = ThinBuffer;
fn run_fat_lto(
fn run_and_optimize_fat_lto(
cgcx: &CodegenContext<Self>,
modules: Vec<FatLtoInput<Self>>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<Self>, FatalError> {
diff_fncs: Vec<AutoDiffItem>,
) -> Result<ModuleCodegen<Self::Module>, FatalError> {
if !diff_fncs.is_empty() {
unimplemented!();
}
back::lto::run_fat(cgcx, modules, cached_modules)
}
@ -365,7 +370,7 @@ impl WriteBackendMethods for GccCodegenBackend {
cgcx: &CodegenContext<Self>,
modules: Vec<(String, Self::ThinBuffer)>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
) -> Result<(Vec<ThinModule<Self>>, Vec<WorkProduct>), FatalError> {
back::lto::run_thin(cgcx, modules, cached_modules)
}
@ -387,14 +392,6 @@ impl WriteBackendMethods for GccCodegenBackend {
Ok(())
}
fn optimize_fat(
_cgcx: &CodegenContext<Self>,
_module: &mut ModuleCodegen<Self::Module>,
) -> Result<(), FatalError> {
// TODO(antoyo)
Ok(())
}
fn optimize_thin(
cgcx: &CodegenContext<Self>,
thin: ThinModule<Self>,
@ -404,11 +401,10 @@ impl WriteBackendMethods for GccCodegenBackend {
fn codegen(
cgcx: &CodegenContext<Self>,
dcx: DiagCtxtHandle<'_>,
module: ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<CompiledModule, FatalError> {
back::write::codegen(cgcx, dcx, module, config)
back::write::codegen(cgcx, module, config)
}
fn prepare_thin(
@ -429,15 +425,6 @@ impl WriteBackendMethods for GccCodegenBackend {
) -> Result<ModuleCodegen<Self::Module>, FatalError> {
back::write::link(cgcx, dcx, modules)
}
fn autodiff(
_cgcx: &CodegenContext<Self>,
_module: &ModuleCodegen<Self::Module>,
_diff_functions: Vec<AutoDiffItem>,
_config: &ModuleConfig,
) -> Result<(), FatalError> {
unimplemented!()
}
}
/// This is the entrypoint for a hot plugged rustc_codegen_gccjit

View file

@ -1,5 +1,4 @@
codegen_llvm_autodiff_without_enable = using the autodiff feature requires -Z autodiff=Enable
codegen_llvm_autodiff_without_lto = using the autodiff feature requires using fat-lto
codegen_llvm_copy_bitcode = failed to copy bitcode to object file: {$err}

View file

@ -7,7 +7,7 @@ use std::sync::Arc;
use std::{io, iter, slice};
use object::read::archive::ArchiveFile;
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared};
use rustc_codegen_ssa::back::symbol_export;
use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
use rustc_codegen_ssa::traits::*;
@ -201,7 +201,7 @@ pub(crate) fn run_fat(
cgcx: &CodegenContext<LlvmCodegenBackend>,
modules: Vec<FatLtoInput<LlvmCodegenBackend>>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, dcx)?;
@ -217,7 +217,7 @@ pub(crate) fn run_thin(
cgcx: &CodegenContext<LlvmCodegenBackend>,
modules: Vec<(String, ThinBuffer)>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
) -> Result<(Vec<ThinModule<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, dcx)?;
@ -248,7 +248,7 @@ fn fat_lto(
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
symbols_below_threshold: &[*const libc::c_char],
) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
info!("going for a fat lto");
@ -366,7 +366,7 @@ fn fat_lto(
save_temp_bitcode(cgcx, &module, "lto.after-restriction");
}
Ok(LtoModuleCodegen::Fat(module))
Ok(module)
}
pub(crate) struct Linker<'a>(&'a mut llvm::Linker<'a>);
@ -436,7 +436,7 @@ fn thin_lto(
serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
symbols_below_threshold: &[*const libc::c_char],
) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
) -> Result<(Vec<ThinModule<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
unsafe {
info!("going for that thin, thin LTO");
@ -568,10 +568,7 @@ fn thin_lto(
}
info!(" - {}: re-compiled", module_name);
opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
shared: Arc::clone(&shared),
idx: module_index,
}));
opt_jobs.push(ThinModule { shared: Arc::clone(&shared), idx: module_index });
}
// Save the current ThinLTO import information for the next compilation

View file

@ -817,10 +817,12 @@ pub(crate) fn link(
pub(crate) fn codegen(
cgcx: &CodegenContext<LlvmCodegenBackend>,
dcx: DiagCtxtHandle<'_>,
module: ModuleCodegen<ModuleLlvm>,
config: &ModuleConfig,
) -> Result<CompiledModule, FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
let _timer = cgcx.prof.generic_activity_with_arg("LLVM_module_codegen", &*module.name);
{
let llmod = module.module_llvm.llmod();

View file

@ -2,7 +2,6 @@ use std::ptr;
use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode};
use rustc_codegen_ssa::ModuleCodegen;
use rustc_codegen_ssa::back::write::ModuleConfig;
use rustc_codegen_ssa::common::TypeKind;
use rustc_codegen_ssa::traits::BaseTypeCodegenMethods;
use rustc_errors::FatalError;
@ -461,7 +460,6 @@ pub(crate) fn differentiate<'ll>(
module: &'ll ModuleCodegen<ModuleLlvm>,
cgcx: &CodegenContext<LlvmCodegenBackend>,
diff_items: Vec<AutoDiffItem>,
_config: &ModuleConfig,
) -> Result<(), FatalError> {
for item in &diff_items {
trace!("{}", item);

View file

@ -37,10 +37,6 @@ impl<G: EmissionGuarantee> Diagnostic<'_, G> for ParseTargetMachineConfig<'_> {
}
}
#[derive(Diagnostic)]
#[diag(codegen_llvm_autodiff_without_lto)]
pub(crate) struct AutoDiffWithoutLTO;
#[derive(Diagnostic)]
#[diag(codegen_llvm_autodiff_without_enable)]
pub(crate) struct AutoDiffWithoutEnable;

View file

@ -26,11 +26,11 @@ use std::mem::ManuallyDrop;
use back::owned_target_machine::OwnedTargetMachine;
use back::write::{create_informational_target_machine, create_target_machine};
use context::SimpleCx;
use errors::{AutoDiffWithoutLTO, ParseTargetMachineConfig};
use errors::ParseTargetMachineConfig;
use llvm_util::target_config;
use rustc_ast::expand::allocator::AllocatorKind;
use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule};
use rustc_codegen_ssa::back::write::{
CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn,
};
@ -43,7 +43,7 @@ use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
use rustc_middle::ty::TyCtxt;
use rustc_middle::util::Providers;
use rustc_session::Session;
use rustc_session::config::{Lto, OptLevel, OutputFilenames, PrintKind, PrintRequest};
use rustc_session::config::{OptLevel, OutputFilenames, PrintKind, PrintRequest};
use rustc_span::Symbol;
mod back {
@ -174,18 +174,29 @@ impl WriteBackendMethods for LlvmCodegenBackend {
) -> Result<ModuleCodegen<Self::Module>, FatalError> {
back::write::link(cgcx, dcx, modules)
}
fn run_fat_lto(
fn run_and_optimize_fat_lto(
cgcx: &CodegenContext<Self>,
modules: Vec<FatLtoInput<Self>>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<Self>, FatalError> {
back::lto::run_fat(cgcx, modules, cached_modules)
diff_fncs: Vec<AutoDiffItem>,
) -> Result<ModuleCodegen<Self::Module>, FatalError> {
let mut module = back::lto::run_fat(cgcx, modules, cached_modules)?;
if !diff_fncs.is_empty() {
builder::autodiff::differentiate(&module, cgcx, diff_fncs)?;
}
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
back::lto::run_pass_manager(cgcx, dcx, &mut module, false)?;
Ok(module)
}
fn run_thin_lto(
cgcx: &CodegenContext<Self>,
modules: Vec<(String, Self::ThinBuffer)>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
) -> Result<(Vec<ThinModule<Self>>, Vec<WorkProduct>), FatalError> {
back::lto::run_thin(cgcx, modules, cached_modules)
}
fn optimize(
@ -196,14 +207,6 @@ impl WriteBackendMethods for LlvmCodegenBackend {
) -> Result<(), FatalError> {
back::write::optimize(cgcx, dcx, module, config)
}
fn optimize_fat(
cgcx: &CodegenContext<Self>,
module: &mut ModuleCodegen<Self::Module>,
) -> Result<(), FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
back::lto::run_pass_manager(cgcx, dcx, module, false)
}
fn optimize_thin(
cgcx: &CodegenContext<Self>,
thin: ThinModule<Self>,
@ -212,11 +215,10 @@ impl WriteBackendMethods for LlvmCodegenBackend {
}
fn codegen(
cgcx: &CodegenContext<Self>,
dcx: DiagCtxtHandle<'_>,
module: ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<CompiledModule, FatalError> {
back::write::codegen(cgcx, dcx, module, config)
back::write::codegen(cgcx, module, config)
}
fn prepare_thin(
module: ModuleCodegen<Self::Module>,
@ -227,19 +229,6 @@ impl WriteBackendMethods for LlvmCodegenBackend {
fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
(module.name, back::lto::ModuleBuffer::new(module.module_llvm.llmod()))
}
/// Generate autodiff rules
fn autodiff(
cgcx: &CodegenContext<Self>,
module: &ModuleCodegen<Self::Module>,
diff_fncs: Vec<AutoDiffItem>,
config: &ModuleConfig,
) -> Result<(), FatalError> {
if cgcx.lto != Lto::Fat {
let dcx = cgcx.create_dcx();
return Err(dcx.handle().emit_almost_fatal(AutoDiffWithoutLTO));
}
builder::autodiff::differentiate(module, cgcx, diff_fncs, config)
}
}
impl LlvmCodegenBackend {

View file

@ -1,13 +1,8 @@
use std::ffi::CString;
use std::sync::Arc;
use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
use rustc_data_structures::memmap::Mmap;
use rustc_errors::FatalError;
use super::write::CodegenContext;
use crate::ModuleCodegen;
use crate::back::write::ModuleConfig;
use crate::traits::*;
pub struct ThinModule<B: WriteBackendMethods> {
@ -42,61 +37,6 @@ pub struct ThinShared<B: WriteBackendMethods> {
pub module_names: Vec<CString>,
}
pub enum LtoModuleCodegen<B: WriteBackendMethods> {
Fat(ModuleCodegen<B::Module>),
Thin(ThinModule<B>),
}
impl<B: WriteBackendMethods> LtoModuleCodegen<B> {
pub fn name(&self) -> &str {
match *self {
LtoModuleCodegen::Fat(_) => "everything",
LtoModuleCodegen::Thin(ref m) => m.name(),
}
}
/// Optimize this module within the given codegen context.
pub fn optimize(
self,
cgcx: &CodegenContext<B>,
) -> Result<ModuleCodegen<B::Module>, FatalError> {
match self {
LtoModuleCodegen::Fat(mut module) => {
B::optimize_fat(cgcx, &mut module)?;
Ok(module)
}
LtoModuleCodegen::Thin(thin) => B::optimize_thin(cgcx, thin),
}
}
/// A "gauge" of how costly it is to optimize this module, used to sort
/// biggest modules first.
pub fn cost(&self) -> u64 {
match *self {
// Only one module with fat LTO, so the cost doesn't matter.
LtoModuleCodegen::Fat(_) => 0,
LtoModuleCodegen::Thin(ref m) => m.cost(),
}
}
/// Run autodiff on Fat LTO module
pub fn autodiff(
self,
cgcx: &CodegenContext<B>,
diff_fncs: Vec<AutoDiffItem>,
config: &ModuleConfig,
) -> Result<LtoModuleCodegen<B>, FatalError> {
match &self {
LtoModuleCodegen::Fat(module) => {
B::autodiff(cgcx, &module, diff_fncs, config)?;
}
_ => panic!("autodiff called with non-fat LTO module"),
}
Ok(self)
}
}
pub enum SerializedModule<M: ModuleBufferMethods> {
Local(M),
FromRlib(Vec<u8>),

View file

@ -397,50 +397,31 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
}
}
fn generate_lto_work<B: ExtraBackendMethods>(
fn generate_thin_lto_work<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
autodiff: Vec<AutoDiffItem>,
needs_fat_lto: Vec<FatLtoInput<B>>,
needs_thin_lto: Vec<(String, B::ThinBuffer)>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
) -> Vec<(WorkItem<B>, u64)> {
let _prof_timer = cgcx.prof.generic_activity("codegen_generate_lto_work");
let _prof_timer = cgcx.prof.generic_activity("codegen_thin_generate_lto_work");
if !needs_fat_lto.is_empty() {
assert!(needs_thin_lto.is_empty());
let mut module =
B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise());
if cgcx.lto == Lto::Fat && !autodiff.is_empty() {
let config = cgcx.config(ModuleKind::Regular);
module = module.autodiff(cgcx, autodiff, config).unwrap_or_else(|e| e.raise());
}
// We are adding a single work item, so the cost doesn't matter.
vec![(WorkItem::LTO(module), 0)]
} else {
if !autodiff.is_empty() {
let dcx = cgcx.create_dcx();
dcx.handle().emit_fatal(AutodiffWithoutLto {});
}
assert!(needs_fat_lto.is_empty());
let (lto_modules, copy_jobs) = B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules)
.unwrap_or_else(|e| e.raise());
lto_modules
.into_iter()
.map(|module| {
let cost = module.cost();
(WorkItem::LTO(module), cost)
})
.chain(copy_jobs.into_iter().map(|wp| {
(
WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
name: wp.cgu_name.clone(),
source: wp,
}),
0, // copying is very cheap
)
}))
.collect()
}
let (lto_modules, copy_jobs) =
B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules).unwrap_or_else(|e| e.raise());
lto_modules
.into_iter()
.map(|module| {
let cost = module.cost();
(WorkItem::ThinLto(module), cost)
})
.chain(copy_jobs.into_iter().map(|wp| {
(
WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
name: wp.cgu_name.clone(),
source: wp,
}),
0, // copying is very cheap
)
}))
.collect()
}
struct CompiledModules {
@ -470,6 +451,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
backend: B,
tcx: TyCtxt<'_>,
target_cpu: String,
autodiff_items: &[AutoDiffItem],
) -> OngoingCodegen<B> {
let (coordinator_send, coordinator_receive) = channel();
@ -488,6 +470,7 @@ pub(crate) fn start_async_codegen<B: ExtraBackendMethods>(
backend.clone(),
tcx,
&crate_info,
autodiff_items,
shared_emitter,
codegen_worker_send,
coordinator_receive,
@ -736,15 +719,23 @@ pub(crate) enum WorkItem<B: WriteBackendMethods> {
/// Copy the post-LTO artifacts from the incremental cache to the output
/// directory.
CopyPostLtoArtifacts(CachedModuleCodegen),
/// Performs (Thin)LTO on the given module.
LTO(lto::LtoModuleCodegen<B>),
/// Performs fat LTO on the given module.
FatLto {
needs_fat_lto: Vec<FatLtoInput<B>>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
autodiff: Vec<AutoDiffItem>,
},
/// Performs thin-LTO on the given module.
ThinLto(lto::ThinModule<B>),
}
impl<B: WriteBackendMethods> WorkItem<B> {
fn module_kind(&self) -> ModuleKind {
match *self {
WorkItem::Optimize(ref m) => m.kind,
WorkItem::CopyPostLtoArtifacts(_) | WorkItem::LTO(_) => ModuleKind::Regular,
WorkItem::CopyPostLtoArtifacts(_) | WorkItem::FatLto { .. } | WorkItem::ThinLto(_) => {
ModuleKind::Regular
}
}
}
@ -792,7 +783,8 @@ impl<B: WriteBackendMethods> WorkItem<B> {
match self {
WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
WorkItem::LTO(m) => desc("lto", "LTO module", m.name()),
WorkItem::FatLto { .. } => desc("lto", "fat LTO module", "everything"),
WorkItem::ThinLto(m) => desc("lto", "thin-LTO module", m.name()),
}
}
}
@ -996,12 +988,24 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
})
}
fn execute_lto_work_item<B: ExtraBackendMethods>(
fn execute_fat_lto_work_item<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
module: lto::LtoModuleCodegen<B>,
needs_fat_lto: Vec<FatLtoInput<B>>,
import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
autodiff: Vec<AutoDiffItem>,
module_config: &ModuleConfig,
) -> Result<WorkItemResult<B>, FatalError> {
let module = module.optimize(cgcx)?;
let module = B::run_and_optimize_fat_lto(cgcx, needs_fat_lto, import_only_modules, autodiff)?;
let module = B::codegen(cgcx, module, module_config)?;
Ok(WorkItemResult::Finished(module))
}
fn execute_thin_lto_work_item<B: ExtraBackendMethods>(
cgcx: &CodegenContext<B>,
module: lto::ThinModule<B>,
module_config: &ModuleConfig,
) -> Result<WorkItemResult<B>, FatalError> {
let module = B::optimize_thin(cgcx, module)?;
finish_intra_module_work(cgcx, module, module_config)
}
@ -1010,11 +1014,8 @@ fn finish_intra_module_work<B: ExtraBackendMethods>(
module: ModuleCodegen<B::Module>,
module_config: &ModuleConfig,
) -> Result<WorkItemResult<B>, FatalError> {
let dcx = cgcx.create_dcx();
let dcx = dcx.handle();
if !cgcx.opts.unstable_opts.combine_cgu || module.kind == ModuleKind::Allocator {
let module = B::codegen(cgcx, dcx, module, module_config)?;
let module = B::codegen(cgcx, module, module_config)?;
Ok(WorkItemResult::Finished(module))
} else {
Ok(WorkItemResult::NeedsLink(module))
@ -1031,9 +1032,6 @@ pub(crate) enum Message<B: WriteBackendMethods> {
/// Sent from a backend worker thread.
WorkItem { result: Result<WorkItemResult<B>, Option<WorkerFatalError>>, worker_id: usize },
/// A vector containing all the AutoDiff tasks that we have to pass to Enzyme.
AddAutoDiffItems(Vec<AutoDiffItem>),
/// The frontend has finished generating something (backend IR or a
/// post-LTO artifact) for a codegen unit, and it should be passed to the
/// backend. Sent from the main thread.
@ -1100,6 +1098,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
backend: B,
tcx: TyCtxt<'_>,
crate_info: &CrateInfo,
autodiff_items: &[AutoDiffItem],
shared_emitter: SharedEmitter,
codegen_worker_send: Sender<CguMessage>,
coordinator_receive: Receiver<Box<dyn Any + Send>>,
@ -1109,6 +1108,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
) -> thread::JoinHandle<Result<CompiledModules, ()>> {
let coordinator_send = tx_to_llvm_workers;
let sess = tcx.sess;
let autodiff_items = autodiff_items.to_vec();
let mut each_linked_rlib_for_lto = Vec::new();
drop(link::each_linked_rlib(crate_info, None, &mut |cnum, path| {
@ -1362,7 +1362,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
// This is where we collect codegen units that have gone all the way
// through codegen and LLVM.
let mut autodiff_items = Vec::new();
let mut compiled_modules = vec![];
let mut compiled_allocator_module = None;
let mut needs_link = Vec::new();
@ -1474,20 +1473,37 @@ fn start_executing_work<B: ExtraBackendMethods>(
let needs_thin_lto = mem::take(&mut needs_thin_lto);
let import_only_modules = mem::take(&mut lto_import_only_modules);
for (work, cost) in generate_lto_work(
&cgcx,
autodiff_items.clone(),
needs_fat_lto,
needs_thin_lto,
import_only_modules,
) {
let insertion_index = work_items
.binary_search_by_key(&cost, |&(_, cost)| cost)
.unwrap_or_else(|e| e);
work_items.insert(insertion_index, (work, cost));
if !needs_fat_lto.is_empty() {
assert!(needs_thin_lto.is_empty());
work_items.push((
WorkItem::FatLto {
needs_fat_lto,
import_only_modules,
autodiff: autodiff_items.clone(),
},
0,
));
if cgcx.parallel {
helper.request_token();
}
} else {
if !autodiff_items.is_empty() {
let dcx = cgcx.create_dcx();
dcx.handle().emit_fatal(AutodiffWithoutLto {});
}
for (work, cost) in
generate_thin_lto_work(&cgcx, needs_thin_lto, import_only_modules)
{
let insertion_index = work_items
.binary_search_by_key(&cost, |&(_, cost)| cost)
.unwrap_or_else(|e| e);
work_items.insert(insertion_index, (work, cost));
if cgcx.parallel {
helper.request_token();
}
}
}
}
@ -1616,10 +1632,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
main_thread_state = MainThreadState::Idle;
}
Message::AddAutoDiffItems(mut items) => {
autodiff_items.append(&mut items);
}
Message::CodegenComplete => {
if codegen_state != Aborted {
codegen_state = Completed;
@ -1702,7 +1714,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
let dcx = dcx.handle();
let module = B::run_link(&cgcx, dcx, needs_link).map_err(|_| ())?;
let module =
B::codegen(&cgcx, dcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
B::codegen(&cgcx, module, cgcx.config(ModuleKind::Regular)).map_err(|_| ())?;
compiled_modules.push(module);
}
@ -1842,10 +1854,22 @@ fn spawn_work<'a, B: ExtraBackendMethods>(
);
Ok(execute_copy_from_cache_work_item(&cgcx, m, module_config))
}
WorkItem::LTO(m) => {
WorkItem::FatLto { needs_fat_lto, import_only_modules, autodiff } => {
let _timer = cgcx
.prof
.generic_activity_with_arg("codegen_module_perform_lto", "everything");
execute_fat_lto_work_item(
&cgcx,
needs_fat_lto,
import_only_modules,
autodiff,
module_config,
)
}
WorkItem::ThinLto(m) => {
let _timer =
cgcx.prof.generic_activity_with_arg("codegen_module_perform_lto", m.name());
execute_lto_work_item(&cgcx, m, module_config)
execute_thin_lto_work_item(&cgcx, m, module_config)
}
})
};
@ -2082,10 +2106,6 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
drop(self.coordinator.sender.send(Box::new(Message::CodegenComplete::<B>)));
}
pub(crate) fn submit_autodiff_items(&self, items: Vec<AutoDiffItem>) {
drop(self.coordinator.sender.send(Box::new(Message::<B>::AddAutoDiffItems(items))));
}
pub(crate) fn check_for_errors(&self, sess: &Session) {
self.shared_emitter_main.check(sess, false);
}

View file

@ -647,7 +647,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
) -> OngoingCodegen<B> {
// Skip crate items and just output metadata in -Z no-codegen mode.
if tcx.sess.opts.unstable_opts.no_codegen || !tcx.sess.opts.output_types.should_codegen() {
let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu);
let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu, &[]);
ongoing_codegen.codegen_finished(tcx);
@ -667,7 +667,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
// codegen units.
let MonoItemPartitions { codegen_units, autodiff_items, .. } =
tcx.collect_and_partition_mono_items(());
let autodiff_fncs = autodiff_items.to_vec();
// Force all codegen_unit queries so they are already either red or green
// when compile_codegen_unit accesses them. We are not able to re-execute
@ -680,7 +679,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
}
}
let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu);
let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu, autodiff_items);
// Codegen an allocator shim, if necessary.
if let Some(kind) = allocator_kind_for_codegen(tcx) {
@ -710,10 +709,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
);
}
if !autodiff_fncs.is_empty() {
ongoing_codegen.submit_autodiff_items(autodiff_fncs);
}
// For better throughput during parallel processing by LLVM, we used to sort
// CGUs largest to smallest. This would lead to better thread utilization
// by, for example, preventing a large CGU from being processed last and

View file

@ -2,7 +2,7 @@ use rustc_ast::expand::autodiff_attrs::AutoDiffItem;
use rustc_errors::{DiagCtxtHandle, FatalError};
use rustc_middle::dep_graph::WorkProduct;
use crate::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
use crate::back::lto::{SerializedModule, ThinModule};
use crate::back::write::{CodegenContext, FatLtoInput, ModuleConfig};
use crate::{CompiledModule, ModuleCodegen};
@ -20,13 +20,14 @@ pub trait WriteBackendMethods: Clone + 'static {
dcx: DiagCtxtHandle<'_>,
modules: Vec<ModuleCodegen<Self::Module>>,
) -> Result<ModuleCodegen<Self::Module>, FatalError>;
/// Performs fat LTO by merging all modules into a single one and returning it
/// for further optimization.
fn run_fat_lto(
/// Performs fat LTO by merging all modules into a single one, running autodiff
/// if necessary and running any further optimizations
fn run_and_optimize_fat_lto(
cgcx: &CodegenContext<Self>,
modules: Vec<FatLtoInput<Self>>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<Self>, FatalError>;
diff_fncs: Vec<AutoDiffItem>,
) -> Result<ModuleCodegen<Self::Module>, FatalError>;
/// Performs thin LTO by performing necessary global analysis and returning two
/// lists, one of the modules that need optimization and another for modules that
/// can simply be copied over from the incr. comp. cache.
@ -34,7 +35,7 @@ pub trait WriteBackendMethods: Clone + 'static {
cgcx: &CodegenContext<Self>,
modules: Vec<(String, Self::ThinBuffer)>,
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError>;
) -> Result<(Vec<ThinModule<Self>>, Vec<WorkProduct>), FatalError>;
fn print_pass_timings(&self);
fn print_statistics(&self);
fn optimize(
@ -43,17 +44,12 @@ pub trait WriteBackendMethods: Clone + 'static {
module: &mut ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<(), FatalError>;
fn optimize_fat(
cgcx: &CodegenContext<Self>,
llmod: &mut ModuleCodegen<Self::Module>,
) -> Result<(), FatalError>;
fn optimize_thin(
cgcx: &CodegenContext<Self>,
thin: ThinModule<Self>,
) -> Result<ModuleCodegen<Self::Module>, FatalError>;
fn codegen(
cgcx: &CodegenContext<Self>,
dcx: DiagCtxtHandle<'_>,
module: ModuleCodegen<Self::Module>,
config: &ModuleConfig,
) -> Result<CompiledModule, FatalError>;
@ -62,12 +58,6 @@ pub trait WriteBackendMethods: Clone + 'static {
want_summary: bool,
) -> (String, Self::ThinBuffer);
fn serialize_module(module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer);
fn autodiff(
cgcx: &CodegenContext<Self>,
module: &ModuleCodegen<Self::Module>,
diff_fncs: Vec<AutoDiffItem>,
config: &ModuleConfig,
) -> Result<(), FatalError>;
}
pub trait ThinBufferMethods: Send + Sync {