Rollup merge of #152566 - bjorn3:cg_gcc_no_thin_lto, r=antoyo
Remove code for ThinLTO from cg_gcc It was just a dummy implementation to workarround the fact that thin local lto is the default in rustc. By adding a thin_lto_supported thin local lto can be automatically disabled for cg_gcc, removing the need for this dummy implementation. This makes improvements to the LTO handling on the cg_ssa side a lot easier. cc [#rustc-codegen-gcc > thin LTO implementation](https://rust-lang.zulipchat.com/#narrow/channel/386786-rustc-codegen-gcc/topic/thin.20LTO.20implementation/with/573625132) This should make the work on https://github.com/rust-lang/compiler-team/issues/908 easier. r? rust-lang/wg-gcc-backend
This commit is contained in:
commit
4a4ea14148
7 changed files with 56 additions and 409 deletions
|
|
@ -17,15 +17,13 @@
|
||||||
// /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
|
// /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
|
||||||
// /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization
|
// /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization
|
||||||
// cSpell:enable
|
// cSpell:enable
|
||||||
use std::ffi::{CStr, CString};
|
use std::ffi::CString;
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
|
||||||
use std::sync::atomic::Ordering;
|
|
||||||
|
|
||||||
use gccjit::{Context, OutputKind};
|
use gccjit::OutputKind;
|
||||||
use object::read::archive::ArchiveFile;
|
use object::read::archive::ArchiveFile;
|
||||||
use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule, ThinShared};
|
use rustc_codegen_ssa::back::lto::SerializedModule;
|
||||||
use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, SharedEmitter};
|
use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, SharedEmitter};
|
||||||
use rustc_codegen_ssa::traits::*;
|
use rustc_codegen_ssa::traits::*;
|
||||||
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
|
use rustc_codegen_ssa::{ModuleCodegen, ModuleKind, looks_like_rust_object_file};
|
||||||
|
|
@ -33,15 +31,12 @@ use rustc_data_structures::memmap::Mmap;
|
||||||
use rustc_data_structures::profiling::SelfProfilerRef;
|
use rustc_data_structures::profiling::SelfProfilerRef;
|
||||||
use rustc_errors::{DiagCtxt, DiagCtxtHandle};
|
use rustc_errors::{DiagCtxt, DiagCtxtHandle};
|
||||||
use rustc_log::tracing::info;
|
use rustc_log::tracing::info;
|
||||||
use rustc_middle::bug;
|
|
||||||
use rustc_middle::dep_graph::WorkProduct;
|
|
||||||
use rustc_session::config::Lto;
|
use rustc_session::config::Lto;
|
||||||
use rustc_target::spec::RelocModel;
|
|
||||||
use tempfile::{TempDir, tempdir};
|
use tempfile::{TempDir, tempdir};
|
||||||
|
|
||||||
use crate::back::write::save_temp_bitcode;
|
use crate::back::write::save_temp_bitcode;
|
||||||
use crate::errors::LtoBitcodeFromRlib;
|
use crate::errors::LtoBitcodeFromRlib;
|
||||||
use crate::{GccCodegenBackend, GccContext, LTO_SUPPORTED, LtoMode, SyncContext, to_gcc_opt_level};
|
use crate::{GccCodegenBackend, GccContext, LtoMode, to_gcc_opt_level};
|
||||||
|
|
||||||
struct LtoData {
|
struct LtoData {
|
||||||
// TODO(antoyo): use symbols_below_threshold.
|
// TODO(antoyo): use symbols_below_threshold.
|
||||||
|
|
@ -281,385 +276,3 @@ impl ModuleBufferMethods for ModuleBuffer {
|
||||||
&[]
|
&[]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Performs thin LTO by performing necessary global analysis and returning two
|
|
||||||
/// lists, one of the modules that need optimization and another for modules that
|
|
||||||
/// can simply be copied over from the incr. comp. cache.
|
|
||||||
pub(crate) fn run_thin(
|
|
||||||
cgcx: &CodegenContext,
|
|
||||||
prof: &SelfProfilerRef,
|
|
||||||
dcx: DiagCtxtHandle<'_>,
|
|
||||||
each_linked_rlib_for_lto: &[PathBuf],
|
|
||||||
modules: Vec<(String, ThinBuffer)>,
|
|
||||||
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
|
|
||||||
) -> (Vec<ThinModule<GccCodegenBackend>>, Vec<WorkProduct>) {
|
|
||||||
let lto_data = prepare_lto(cgcx, each_linked_rlib_for_lto, dcx);
|
|
||||||
if cgcx.use_linker_plugin_lto {
|
|
||||||
unreachable!(
|
|
||||||
"We should never reach this case if the LTO step \
|
|
||||||
is deferred to the linker"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
thin_lto(
|
|
||||||
cgcx,
|
|
||||||
prof,
|
|
||||||
dcx,
|
|
||||||
modules,
|
|
||||||
lto_data.upstream_modules,
|
|
||||||
lto_data.tmp_path,
|
|
||||||
cached_modules,
|
|
||||||
//<o_data.symbols_below_threshold,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn prepare_thin(module: ModuleCodegen<GccContext>) -> (String, ThinBuffer) {
|
|
||||||
let name = module.name;
|
|
||||||
//let buffer = ThinBuffer::new(module.module_llvm.context, true);
|
|
||||||
let buffer = ThinBuffer::new(&module.module_llvm.context);
|
|
||||||
(name, buffer)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Prepare "thin" LTO to get run on these modules.
|
|
||||||
///
|
|
||||||
/// The general structure of ThinLTO is quite different from the structure of
|
|
||||||
/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
|
|
||||||
/// one giant LLVM module, and then we run more optimization passes over this
|
|
||||||
/// big module after internalizing most symbols. Thin LTO, on the other hand,
|
|
||||||
/// avoid this large bottleneck through more targeted optimization.
|
|
||||||
///
|
|
||||||
/// At a high level Thin LTO looks like:
|
|
||||||
///
|
|
||||||
/// 1. Prepare a "summary" of each LLVM module in question which describes
|
|
||||||
/// the values inside, cost of the values, etc.
|
|
||||||
/// 2. Merge the summaries of all modules in question into one "index"
|
|
||||||
/// 3. Perform some global analysis on this index
|
|
||||||
/// 4. For each module, use the index and analysis calculated previously to
|
|
||||||
/// perform local transformations on the module, for example inlining
|
|
||||||
/// small functions from other modules.
|
|
||||||
/// 5. Run thin-specific optimization passes over each module, and then code
|
|
||||||
/// generate everything at the end.
|
|
||||||
///
|
|
||||||
/// The summary for each module is intended to be quite cheap, and the global
|
|
||||||
/// index is relatively quite cheap to create as well. As a result, the goal of
|
|
||||||
/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
|
|
||||||
/// situations. For example one cheap optimization is that we can parallelize
|
|
||||||
/// all codegen modules, easily making use of all the cores on a machine.
|
|
||||||
///
|
|
||||||
/// With all that in mind, the function here is designed at specifically just
|
|
||||||
/// calculating the *index* for ThinLTO. This index will then be shared amongst
|
|
||||||
/// all of the `LtoModuleCodegen` units returned below and destroyed once
|
|
||||||
/// they all go out of scope.
|
|
||||||
fn thin_lto(
|
|
||||||
_cgcx: &CodegenContext,
|
|
||||||
prof: &SelfProfilerRef,
|
|
||||||
_dcx: DiagCtxtHandle<'_>,
|
|
||||||
modules: Vec<(String, ThinBuffer)>,
|
|
||||||
serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
|
|
||||||
tmp_path: TempDir,
|
|
||||||
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
|
|
||||||
//_symbols_below_threshold: &[String],
|
|
||||||
) -> (Vec<ThinModule<GccCodegenBackend>>, Vec<WorkProduct>) {
|
|
||||||
let _timer = prof.generic_activity("LLVM_thin_lto_global_analysis");
|
|
||||||
info!("going for that thin, thin LTO");
|
|
||||||
|
|
||||||
/*let green_modules: FxHashMap<_, _> =
|
|
||||||
cached_modules.iter().map(|(_, wp)| (wp.cgu_name.clone(), wp.clone())).collect();*/
|
|
||||||
|
|
||||||
let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
|
|
||||||
let mut thin_buffers = Vec::with_capacity(modules.len());
|
|
||||||
let mut module_names = Vec::with_capacity(full_scope_len);
|
|
||||||
//let mut thin_modules = Vec::with_capacity(full_scope_len);
|
|
||||||
|
|
||||||
for (i, (name, buffer)) in modules.into_iter().enumerate() {
|
|
||||||
info!("local module: {} - {}", i, name);
|
|
||||||
let cname = CString::new(name.as_bytes()).unwrap();
|
|
||||||
/*thin_modules.push(llvm::ThinLTOModule {
|
|
||||||
identifier: cname.as_ptr(),
|
|
||||||
data: buffer.data().as_ptr(),
|
|
||||||
len: buffer.data().len(),
|
|
||||||
});*/
|
|
||||||
thin_buffers.push(buffer);
|
|
||||||
module_names.push(cname);
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: All upstream crates are deserialized internally in the
|
|
||||||
// function below to extract their summary and modules. Note that
|
|
||||||
// unlike the loop above we *must* decode and/or read something
|
|
||||||
// here as these are all just serialized files on disk. An
|
|
||||||
// improvement, however, to make here would be to store the
|
|
||||||
// module summary separately from the actual module itself. Right
|
|
||||||
// now this is store in one large bitcode file, and the entire
|
|
||||||
// file is deflate-compressed. We could try to bypass some of the
|
|
||||||
// decompression by storing the index uncompressed and only
|
|
||||||
// lazily decompressing the bytecode if necessary.
|
|
||||||
//
|
|
||||||
// Note that truly taking advantage of this optimization will
|
|
||||||
// likely be further down the road. We'd have to implement
|
|
||||||
// incremental ThinLTO first where we could actually avoid
|
|
||||||
// looking at upstream modules entirely sometimes (the contents,
|
|
||||||
// we must always unconditionally look at the index).
|
|
||||||
let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
|
|
||||||
|
|
||||||
let cached_modules =
|
|
||||||
cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
|
|
||||||
|
|
||||||
for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
|
|
||||||
info!("upstream or cached module {:?}", name);
|
|
||||||
/*thin_modules.push(llvm::ThinLTOModule {
|
|
||||||
identifier: name.as_ptr(),
|
|
||||||
data: module.data().as_ptr(),
|
|
||||||
len: module.data().len(),
|
|
||||||
});*/
|
|
||||||
|
|
||||||
match module {
|
|
||||||
SerializedModule::Local(_) => {
|
|
||||||
//let path = module_buffer.0.to_str().expect("path");
|
|
||||||
//let my_path = PathBuf::from(path);
|
|
||||||
//let exists = my_path.exists();
|
|
||||||
/*module.module_llvm.should_combine_object_files = true;
|
|
||||||
module
|
|
||||||
.module_llvm
|
|
||||||
.context
|
|
||||||
.add_driver_option(module_buffer.0.to_str().expect("path"));*/
|
|
||||||
}
|
|
||||||
SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
|
|
||||||
SerializedModule::FromUncompressedFile(_) => {
|
|
||||||
unimplemented!("from uncompressed file")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
serialized.push(module);
|
|
||||||
module_names.push(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sanity check
|
|
||||||
//assert_eq!(thin_modules.len(), module_names.len());
|
|
||||||
|
|
||||||
// Delegate to the C++ bindings to create some data here. Once this is a
|
|
||||||
// tried-and-true interface we may wish to try to upstream some of this
|
|
||||||
// to LLVM itself, right now we reimplement a lot of what they do
|
|
||||||
// upstream...
|
|
||||||
/*let data = llvm::LLVMRustCreateThinLTOData(
|
|
||||||
thin_modules.as_ptr(),
|
|
||||||
thin_modules.len() as u32,
|
|
||||||
symbols_below_threshold.as_ptr(),
|
|
||||||
symbols_below_threshold.len() as u32,
|
|
||||||
)
|
|
||||||
.ok_or_else(|| write::llvm_err(dcx, LlvmError::PrepareThinLtoContext))?;
|
|
||||||
*/
|
|
||||||
|
|
||||||
let data = ThinData; //(Arc::new(tmp_path))/*(data)*/;
|
|
||||||
|
|
||||||
info!("thin LTO data created");
|
|
||||||
|
|
||||||
/*let (key_map_path, prev_key_map, curr_key_map) =
|
|
||||||
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
|
|
||||||
let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
|
|
||||||
// If the previous file was deleted, or we get an IO error
|
|
||||||
// reading the file, then we'll just use `None` as the
|
|
||||||
// prev_key_map, which will force the code to be recompiled.
|
|
||||||
let prev =
|
|
||||||
if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
|
|
||||||
let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
|
|
||||||
(Some(path), prev, curr)
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// If we don't compile incrementally, we don't need to load the
|
|
||||||
// import data from LLVM.
|
|
||||||
assert!(green_modules.is_empty());
|
|
||||||
let curr = ThinLTOKeysMap::default();
|
|
||||||
(None, None, curr)
|
|
||||||
};
|
|
||||||
info!("thin LTO cache key map loaded");
|
|
||||||
info!("prev_key_map: {:#?}", prev_key_map);
|
|
||||||
info!("curr_key_map: {:#?}", curr_key_map);*/
|
|
||||||
|
|
||||||
// Throw our data in an `Arc` as we'll be sharing it across threads. We
|
|
||||||
// also put all memory referenced by the C++ data (buffers, ids, etc)
|
|
||||||
// into the arc as well. After this we'll create a thin module
|
|
||||||
// codegen per module in this data.
|
|
||||||
let shared =
|
|
||||||
Arc::new(ThinShared { data, thin_buffers, serialized_modules: serialized, module_names });
|
|
||||||
|
|
||||||
let copy_jobs = vec![];
|
|
||||||
let mut opt_jobs = vec![];
|
|
||||||
|
|
||||||
info!("checking which modules can be-reused and which have to be re-optimized.");
|
|
||||||
for (module_index, module_name) in shared.module_names.iter().enumerate() {
|
|
||||||
let module_name = module_name_to_str(module_name);
|
|
||||||
/*if let (Some(prev_key_map), true) =
|
|
||||||
(prev_key_map.as_ref(), green_modules.contains_key(module_name))
|
|
||||||
{
|
|
||||||
assert!(cgcx.incr_comp_session_dir.is_some());
|
|
||||||
|
|
||||||
// If a module exists in both the current and the previous session,
|
|
||||||
// and has the same LTO cache key in both sessions, then we can re-use it
|
|
||||||
if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
|
|
||||||
let work_product = green_modules[module_name].clone();
|
|
||||||
copy_jobs.push(work_product);
|
|
||||||
info!(" - {}: re-used", module_name);
|
|
||||||
assert!(cgcx.incr_comp_session_dir.is_some());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
info!(" - {}: re-compiled", module_name);
|
|
||||||
opt_jobs.push(ThinModule { shared: shared.clone(), idx: module_index });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save the current ThinLTO import information for the next compilation
|
|
||||||
// session, overwriting the previous serialized data (if any).
|
|
||||||
/*if let Some(path) = key_map_path {
|
|
||||||
if let Err(err) = curr_key_map.save_to_file(&path) {
|
|
||||||
return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err }));
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
// NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead
|
|
||||||
// of now.
|
|
||||||
//module.module_llvm.temp_dir = Some(tmp_path);
|
|
||||||
// TODO: save the directory so that it gets deleted later.
|
|
||||||
std::mem::forget(tmp_path);
|
|
||||||
|
|
||||||
(opt_jobs, copy_jobs)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn optimize_thin_module(
|
|
||||||
thin_module: ThinModule<GccCodegenBackend>,
|
|
||||||
_cgcx: &CodegenContext,
|
|
||||||
) -> ModuleCodegen<GccContext> {
|
|
||||||
//let module_name = &thin_module.shared.module_names[thin_module.idx];
|
|
||||||
|
|
||||||
// Right now the implementation we've got only works over serialized
|
|
||||||
// modules, so we create a fresh new LLVM context and parse the module
|
|
||||||
// into that context. One day, however, we may do this for upstream
|
|
||||||
// crates but for locally codegened modules we may be able to reuse
|
|
||||||
// that LLVM Context and Module.
|
|
||||||
//let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
|
|
||||||
//let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &dcx)? as *const _;
|
|
||||||
let mut lto_mode = LtoMode::None;
|
|
||||||
let context = match thin_module.shared.thin_buffers.get(thin_module.idx) {
|
|
||||||
Some(thin_buffer) => Arc::clone(&thin_buffer.context),
|
|
||||||
None => {
|
|
||||||
let context = Context::default();
|
|
||||||
let len = thin_module.shared.thin_buffers.len();
|
|
||||||
let module = &thin_module.shared.serialized_modules[thin_module.idx - len];
|
|
||||||
match *module {
|
|
||||||
SerializedModule::Local(ref module_buffer) => {
|
|
||||||
let path = module_buffer.0.to_str().expect("path");
|
|
||||||
context.add_driver_option(path);
|
|
||||||
lto_mode = LtoMode::Thin;
|
|
||||||
/*module.module_llvm.should_combine_object_files = true;
|
|
||||||
module
|
|
||||||
.module_llvm
|
|
||||||
.context
|
|
||||||
.add_driver_option(module_buffer.0.to_str().expect("path"));*/
|
|
||||||
}
|
|
||||||
SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
|
|
||||||
SerializedModule::FromUncompressedFile(_) => {
|
|
||||||
unimplemented!("from uncompressed file")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Arc::new(SyncContext::new(context))
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let lto_supported = LTO_SUPPORTED.load(Ordering::SeqCst);
|
|
||||||
let module = ModuleCodegen::new_regular(
|
|
||||||
thin_module.name().to_string(),
|
|
||||||
GccContext {
|
|
||||||
context,
|
|
||||||
lto_mode,
|
|
||||||
lto_supported,
|
|
||||||
// TODO(antoyo): use the correct relocation model here.
|
|
||||||
relocation_model: RelocModel::Pic,
|
|
||||||
temp_dir: None,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
/*{
|
|
||||||
let target = &*module.module_llvm.tm;
|
|
||||||
let llmod = module.module_llvm.llmod();
|
|
||||||
save_temp_bitcode(cgcx, &module, "thin-lto-input");
|
|
||||||
|
|
||||||
// Up next comes the per-module local analyses that we do for Thin LTO.
|
|
||||||
// Each of these functions is basically copied from the LLVM
|
|
||||||
// implementation and then tailored to suit this implementation. Ideally
|
|
||||||
// each of these would be supported by upstream LLVM but that's perhaps
|
|
||||||
// a patch for another day!
|
|
||||||
//
|
|
||||||
// You can find some more comments about these functions in the LLVM
|
|
||||||
// bindings we've got (currently `PassWrapper.cpp`)
|
|
||||||
{
|
|
||||||
let _timer =
|
|
||||||
cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
|
|
||||||
unsafe { llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) };
|
|
||||||
save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
let _timer = cgcx
|
|
||||||
.prof
|
|
||||||
.generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
|
|
||||||
if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
|
|
||||||
return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
|
|
||||||
}
|
|
||||||
save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
let _timer = cgcx
|
|
||||||
.prof
|
|
||||||
.generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
|
|
||||||
if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
|
|
||||||
return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
|
|
||||||
}
|
|
||||||
save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
let _timer =
|
|
||||||
cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
|
|
||||||
if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
|
|
||||||
return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
|
|
||||||
}
|
|
||||||
save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Alright now that we've done everything related to the ThinLTO
|
|
||||||
// analysis it's time to run some optimizations! Here we use the same
|
|
||||||
// `run_pass_manager` as the "fat" LTO above except that we tell it to
|
|
||||||
// populate a thin-specific pass manager, which presumably LLVM treats a
|
|
||||||
// little differently.
|
|
||||||
{
|
|
||||||
info!("running thin lto passes over {}", module.name);
|
|
||||||
run_pass_manager(cgcx, &dcx, &mut module, true)?;
|
|
||||||
save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
// FIXME: switch to #[expect] when the clippy bug is fixed.
|
|
||||||
#[allow(clippy::let_and_return)]
|
|
||||||
module
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct ThinBuffer {
|
|
||||||
context: Arc<SyncContext>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ThinBuffer {
|
|
||||||
pub(crate) fn new(context: &Arc<SyncContext>) -> Self {
|
|
||||||
Self { context: Arc::clone(context) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ThinBufferMethods for ThinBuffer {
|
|
||||||
fn data(&self) -> &[u8] {
|
|
||||||
&[]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct ThinData; //(Arc<TempDir>);
|
|
||||||
|
|
||||||
fn module_name_to_str(c_str: &CStr) -> &str {
|
|
||||||
c_str.to_str().unwrap_or_else(|e| {
|
|
||||||
bug!("Encountered non-utf8 GCC module name `{}`: {}", c_str.to_string_lossy(), e)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -76,7 +76,6 @@ use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
use back::lto::{ThinBuffer, ThinData};
|
|
||||||
use gccjit::{CType, Context, OptimizationLevel};
|
use gccjit::{CType, Context, OptimizationLevel};
|
||||||
#[cfg(feature = "master")]
|
#[cfg(feature = "master")]
|
||||||
use gccjit::{TargetInfo, Version};
|
use gccjit::{TargetInfo, Version};
|
||||||
|
|
@ -87,7 +86,9 @@ use rustc_codegen_ssa::back::write::{
|
||||||
};
|
};
|
||||||
use rustc_codegen_ssa::base::codegen_crate;
|
use rustc_codegen_ssa::base::codegen_crate;
|
||||||
use rustc_codegen_ssa::target_features::cfg_target_feature;
|
use rustc_codegen_ssa::target_features::cfg_target_feature;
|
||||||
use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods};
|
use rustc_codegen_ssa::traits::{
|
||||||
|
CodegenBackend, ExtraBackendMethods, ThinBufferMethods, WriteBackendMethods,
|
||||||
|
};
|
||||||
use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen, TargetConfig};
|
use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen, TargetConfig};
|
||||||
use rustc_data_structures::fx::FxIndexMap;
|
use rustc_data_structures::fx::FxIndexMap;
|
||||||
use rustc_data_structures::profiling::SelfProfilerRef;
|
use rustc_data_structures::profiling::SelfProfilerRef;
|
||||||
|
|
@ -177,8 +178,6 @@ pub struct GccCodegenBackend {
|
||||||
lto_supported: Arc<AtomicBool>,
|
lto_supported: Arc<AtomicBool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
static LTO_SUPPORTED: AtomicBool = AtomicBool::new(false);
|
|
||||||
|
|
||||||
fn load_libgccjit_if_needed(libgccjit_target_lib_file: &Path) {
|
fn load_libgccjit_if_needed(libgccjit_target_lib_file: &Path) {
|
||||||
if gccjit::is_loaded() {
|
if gccjit::is_loaded() {
|
||||||
// Do not load a libgccjit second time.
|
// Do not load a libgccjit second time.
|
||||||
|
|
@ -251,7 +250,6 @@ impl CodegenBackend for GccCodegenBackend {
|
||||||
#[cfg(feature = "master")]
|
#[cfg(feature = "master")]
|
||||||
{
|
{
|
||||||
let lto_supported = gccjit::is_lto_supported();
|
let lto_supported = gccjit::is_lto_supported();
|
||||||
LTO_SUPPORTED.store(lto_supported, Ordering::SeqCst);
|
|
||||||
self.lto_supported.store(lto_supported, Ordering::SeqCst);
|
self.lto_supported.store(lto_supported, Ordering::SeqCst);
|
||||||
|
|
||||||
gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
|
gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
|
||||||
|
|
@ -281,6 +279,10 @@ impl CodegenBackend for GccCodegenBackend {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn thin_lto_supported(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
fn provide(&self, providers: &mut Providers) {
|
fn provide(&self, providers: &mut Providers) {
|
||||||
providers.queries.global_backend_features =
|
providers.queries.global_backend_features =
|
||||||
|tcx, ()| gcc_util::global_gcc_features(tcx.sess)
|
|tcx, ()| gcc_util::global_gcc_features(tcx.sess)
|
||||||
|
|
@ -421,11 +423,19 @@ unsafe impl Send for SyncContext {}
|
||||||
// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "CodegenBackend::supports_parallel()".
|
// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "CodegenBackend::supports_parallel()".
|
||||||
unsafe impl Sync for SyncContext {}
|
unsafe impl Sync for SyncContext {}
|
||||||
|
|
||||||
|
pub struct ThinBuffer;
|
||||||
|
|
||||||
|
impl ThinBufferMethods for ThinBuffer {
|
||||||
|
fn data(&self) -> &[u8] {
|
||||||
|
&[]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl WriteBackendMethods for GccCodegenBackend {
|
impl WriteBackendMethods for GccCodegenBackend {
|
||||||
type Module = GccContext;
|
type Module = GccContext;
|
||||||
type TargetMachine = ();
|
type TargetMachine = ();
|
||||||
type ModuleBuffer = ModuleBuffer;
|
type ModuleBuffer = ModuleBuffer;
|
||||||
type ThinData = ThinData;
|
type ThinData = ();
|
||||||
type ThinBuffer = ThinBuffer;
|
type ThinBuffer = ThinBuffer;
|
||||||
|
|
||||||
fn run_and_optimize_fat_lto(
|
fn run_and_optimize_fat_lto(
|
||||||
|
|
@ -442,16 +452,16 @@ impl WriteBackendMethods for GccCodegenBackend {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_thin_lto(
|
fn run_thin_lto(
|
||||||
cgcx: &CodegenContext,
|
_cgcx: &CodegenContext,
|
||||||
prof: &SelfProfilerRef,
|
_prof: &SelfProfilerRef,
|
||||||
dcx: DiagCtxtHandle<'_>,
|
_dcx: DiagCtxtHandle<'_>,
|
||||||
// FIXME(bjorn3): Limit LTO exports to these symbols
|
// FIXME(bjorn3): Limit LTO exports to these symbols
|
||||||
_exported_symbols_for_lto: &[String],
|
_exported_symbols_for_lto: &[String],
|
||||||
each_linked_rlib_for_lto: &[PathBuf],
|
_each_linked_rlib_for_lto: &[PathBuf],
|
||||||
modules: Vec<(String, Self::ThinBuffer)>,
|
_modules: Vec<(String, Self::ThinBuffer)>,
|
||||||
cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
|
_cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
|
||||||
) -> (Vec<ThinModule<Self>>, Vec<WorkProduct>) {
|
) -> (Vec<ThinModule<Self>>, Vec<WorkProduct>) {
|
||||||
back::lto::run_thin(cgcx, prof, dcx, each_linked_rlib_for_lto, modules, cached_modules)
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn print_pass_timings(&self) {
|
fn print_pass_timings(&self) {
|
||||||
|
|
@ -473,13 +483,13 @@ impl WriteBackendMethods for GccCodegenBackend {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn optimize_thin(
|
fn optimize_thin(
|
||||||
cgcx: &CodegenContext,
|
_cgcx: &CodegenContext,
|
||||||
_prof: &SelfProfilerRef,
|
_prof: &SelfProfilerRef,
|
||||||
_shared_emitter: &SharedEmitter,
|
_shared_emitter: &SharedEmitter,
|
||||||
_tm_factory: TargetMachineFactoryFn<Self>,
|
_tm_factory: TargetMachineFactoryFn<Self>,
|
||||||
thin: ThinModule<Self>,
|
_thin: ThinModule<Self>,
|
||||||
) -> ModuleCodegen<Self::Module> {
|
) -> ModuleCodegen<Self::Module> {
|
||||||
back::lto::optimize_thin_module(thin, cgcx)
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn codegen(
|
fn codegen(
|
||||||
|
|
@ -492,8 +502,8 @@ impl WriteBackendMethods for GccCodegenBackend {
|
||||||
back::write::codegen(cgcx, prof, shared_emitter, module, config)
|
back::write::codegen(cgcx, prof, shared_emitter, module, config)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prepare_thin(module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
|
fn prepare_thin(_module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
|
||||||
back::lto::prepare_thin(module)
|
unreachable!()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn serialize_module(_module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
|
fn serialize_module(_module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,11 @@ pub trait CodegenBackend {
|
||||||
vec![]
|
vec![]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Is ThinLTO supported by this backend?
|
||||||
|
fn thin_lto_supported(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
/// Value printed by `--print=backend-has-zstd`.
|
/// Value printed by `--print=backend-has-zstd`.
|
||||||
///
|
///
|
||||||
/// Used by compiletest to determine whether tests involving zstd compression
|
/// Used by compiletest to determine whether tests involving zstd compression
|
||||||
|
|
|
||||||
|
|
@ -463,6 +463,7 @@ pub fn run_compiler<R: Send>(config: Config, f: impl FnOnce(&Compiler) -> R + Se
|
||||||
|
|
||||||
codegen_backend.init(&sess);
|
codegen_backend.init(&sess);
|
||||||
sess.replaced_intrinsics = FxHashSet::from_iter(codegen_backend.replaced_intrinsics());
|
sess.replaced_intrinsics = FxHashSet::from_iter(codegen_backend.replaced_intrinsics());
|
||||||
|
sess.thin_lto_supported = codegen_backend.thin_lto_supported();
|
||||||
|
|
||||||
let cfg = parse_cfg(sess.dcx(), config.crate_cfg);
|
let cfg = parse_cfg(sess.dcx(), config.crate_cfg);
|
||||||
let mut cfg = config::build_configuration(&sess, cfg);
|
let mut cfg = config::build_configuration(&sess, cfg);
|
||||||
|
|
|
||||||
|
|
@ -101,6 +101,7 @@ const ALLOWED_CAPITALIZED_WORDS: &[&str] = &[
|
||||||
"NaNs",
|
"NaNs",
|
||||||
"OK",
|
"OK",
|
||||||
"Rust",
|
"Rust",
|
||||||
|
"ThinLTO",
|
||||||
"Unicode",
|
"Unicode",
|
||||||
"VS",
|
"VS",
|
||||||
// tidy-alphabetical-end
|
// tidy-alphabetical-end
|
||||||
|
|
|
||||||
|
|
@ -537,3 +537,7 @@ pub(crate) struct UnexpectedBuiltinCfg {
|
||||||
pub(crate) cfg_name: Symbol,
|
pub(crate) cfg_name: Symbol,
|
||||||
pub(crate) controlled_by: &'static str,
|
pub(crate) controlled_by: &'static str,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Diagnostic)]
|
||||||
|
#[diag("ThinLTO is not supported by the codegen backend")]
|
||||||
|
pub(crate) struct ThinLtoNotSupportedByBackend;
|
||||||
|
|
|
||||||
|
|
@ -158,6 +158,9 @@ pub struct Session {
|
||||||
/// The names of intrinsics that the current codegen backend replaces
|
/// The names of intrinsics that the current codegen backend replaces
|
||||||
/// with its own implementations.
|
/// with its own implementations.
|
||||||
pub replaced_intrinsics: FxHashSet<Symbol>,
|
pub replaced_intrinsics: FxHashSet<Symbol>,
|
||||||
|
|
||||||
|
/// Does the codegen backend support ThinLTO?
|
||||||
|
pub thin_lto_supported: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
|
|
@ -606,10 +609,19 @@ impl Session {
|
||||||
}
|
}
|
||||||
config::LtoCli::Thin => {
|
config::LtoCli::Thin => {
|
||||||
// The user explicitly asked for ThinLTO
|
// The user explicitly asked for ThinLTO
|
||||||
|
if !self.thin_lto_supported {
|
||||||
|
// Backend doesn't support ThinLTO, disable LTO.
|
||||||
|
self.dcx().emit_warn(errors::ThinLtoNotSupportedByBackend);
|
||||||
|
return config::Lto::No;
|
||||||
|
}
|
||||||
return config::Lto::Thin;
|
return config::Lto::Thin;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !self.thin_lto_supported {
|
||||||
|
return config::Lto::No;
|
||||||
|
}
|
||||||
|
|
||||||
// Ok at this point the target doesn't require anything and the user
|
// Ok at this point the target doesn't require anything and the user
|
||||||
// hasn't asked for anything. Our next decision is whether or not
|
// hasn't asked for anything. Our next decision is whether or not
|
||||||
// we enable "auto" ThinLTO where we use multiple codegen units and
|
// we enable "auto" ThinLTO where we use multiple codegen units and
|
||||||
|
|
@ -1088,6 +1100,7 @@ pub fn build_session(
|
||||||
host_filesearch,
|
host_filesearch,
|
||||||
invocation_temp,
|
invocation_temp,
|
||||||
replaced_intrinsics: FxHashSet::default(), // filled by `run_compiler`
|
replaced_intrinsics: FxHashSet::default(), // filled by `run_compiler`
|
||||||
|
thin_lto_supported: true, // filled by `run_compiler`
|
||||||
};
|
};
|
||||||
|
|
||||||
validate_commandline_args_with_session_available(&sess);
|
validate_commandline_args_with_session_available(&sess);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue