Auto merge of #52266 - michaelwoerister:incr-thinlto-preliminaries, r=alexcrichton

Preliminary work for incremental ThinLTO.

Since implementing incremental ThinLTO is a bit more involved than I initially thought, I'm splitting out some of the things that already work. This PR (1) adds a way accessing some ThinLTO information in `rustc` and (2) does some cleanup around CGU/object file naming (which makes things quite a bit nicer).

This is probably best reviewed one commit at a time.
This commit is contained in:
bors 2018-07-13 22:06:38 +00:00
commit a14a361c2c
25 changed files with 457 additions and 171 deletions

View file

@ -45,13 +45,6 @@ use std::process::{Output, Stdio};
use std::str;
use syntax::attr;
/// The LLVM module name containing crate-metadata. This includes a `.` on
/// purpose, so it cannot clash with the name of a user-defined module.
pub const METADATA_MODULE_NAME: &'static str = "crate.metadata";
// same as for metadata above, but for allocator shim
pub const ALLOCATOR_MODULE_NAME: &'static str = "crate.allocator";
pub use rustc_codegen_utils::link::{find_crate_name, filename_for_input, default_output_for_target,
invalid_output_for_target, build_link_meta, out_filename,
check_file_is_writeable};

View file

@ -20,16 +20,23 @@ use rustc::hir::def_id::LOCAL_CRATE;
use rustc::middle::exported_symbols::SymbolExportLevel;
use rustc::session::config::{self, Lto};
use rustc::util::common::time_ext;
use rustc_data_structures::fx::FxHashMap;
use time_graph::Timeline;
use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource};
use libc;
use std::ffi::CString;
use std::ffi::{CString, CStr};
use std::fs::File;
use std::io;
use std::mem;
use std::path::Path;
use std::ptr;
use std::slice;
use std::sync::Arc;
pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME: &str = "thin-lto-imports.bin";
pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
match crate_type {
config::CrateTypeExecutable |
@ -193,7 +200,7 @@ pub(crate) fn run(cgcx: &CodegenContext,
}
Lto::Thin |
Lto::ThinLocal => {
thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
thin_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
}
Lto::No => unreachable!(),
}
@ -231,7 +238,7 @@ fn fat_lto(cgcx: &CodegenContext,
.expect("must be codegen'ing at least one module");
let module = modules.remove(costliest_module);
let llmod = module.llvm().expect("can't lto pre-codegened modules").llmod;
info!("using {:?} as a base module", module.llmod_id);
info!("using {:?} as a base module", module.name);
// For all other modules we codegened we'll need to link them into our own
// bitcode. All modules were codegened in their own LLVM context, however,
@ -241,7 +248,7 @@ fn fat_lto(cgcx: &CodegenContext,
for module in modules {
let llvm = module.llvm().expect("can't lto pre-codegened modules");
let buffer = ModuleBuffer::new(llvm.llmod);
let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
let llmod_id = CString::new(&module.name[..]).unwrap();
serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
}
@ -346,7 +353,8 @@ impl Drop for Linker {
/// calculating the *index* for ThinLTO. This index will then be shared amongst
/// all of the `LtoModuleCodegen` units returned below and destroyed once
/// they all go out of scope.
fn thin_lto(diag_handler: &Handler,
fn thin_lto(cgcx: &CodegenContext,
diag_handler: &Handler,
modules: Vec<ModuleCodegen>,
serialized_modules: Vec<(SerializedModule, CString)>,
symbol_white_list: &[*const libc::c_char],
@ -368,9 +376,9 @@ fn thin_lto(diag_handler: &Handler,
// the most expensive portion of this small bit of global
// analysis!
for (i, module) in modules.iter().enumerate() {
info!("local module: {} - {}", i, module.llmod_id);
info!("local module: {} - {}", i, module.name);
let llvm = module.llvm().expect("can't lto precodegened module");
let name = CString::new(module.llmod_id.clone()).unwrap();
let name = CString::new(module.name.clone()).unwrap();
let buffer = ThinBuffer::new(llvm.llmod);
thin_modules.push(llvm::ThinLTOModule {
identifier: name.as_ptr(),
@ -379,7 +387,7 @@ fn thin_lto(diag_handler: &Handler,
});
thin_buffers.push(buffer);
module_names.push(name);
timeline.record(&module.llmod_id);
timeline.record(&module.name);
}
// FIXME: All upstream crates are deserialized internally in the
@ -424,6 +432,18 @@ fn thin_lto(diag_handler: &Handler,
let msg = format!("failed to prepare thin LTO context");
return Err(write::llvm_err(&diag_handler, msg))
}
// Save the ThinLTO import information for incremental compilation.
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
let path = incr_comp_session_dir.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME);
let imports = ThinLTOImports::from_thin_lto_data(data);
if let Err(err) = imports.save_to_file(&path) {
let msg = format!("Error while writing ThinLTO import data: {}",
err);
return Err(write::llvm_err(&diag_handler, msg));
}
}
let data = ThinData(data);
info!("thin LTO data created");
timeline.record("data");
@ -656,7 +676,6 @@ impl ThinModule {
llcx,
tm,
}),
llmod_id: self.name().to_string(),
name: self.name().to_string(),
kind: ModuleKind::Regular,
};
@ -776,3 +795,117 @@ impl ThinModule {
Ok(module)
}
}
#[derive(Debug)]
pub struct ThinLTOImports {
// key = llvm name of importing module, value = list of modules it imports from
imports: FxHashMap<String, Vec<String>>,
}
impl ThinLTOImports {
pub fn new() -> ThinLTOImports {
ThinLTOImports {
imports: FxHashMap(),
}
}
/// Load the ThinLTO import map from ThinLTOData.
unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImports {
fn module_name_to_str(c_str: &CStr) -> &str {
match c_str.to_str() {
Ok(s) => s,
Err(e) => {
bug!("Encountered non-utf8 LLVM module name `{}`: {}",
c_str.to_string_lossy(),
e)
}
}
}
unsafe extern "C" fn imported_module_callback(payload: *mut libc::c_void,
importing_module_name: *const libc::c_char,
imported_module_name: *const libc::c_char) {
let map = &mut* (payload as *mut ThinLTOImports);
let importing_module_name = CStr::from_ptr(importing_module_name);
let importing_module_name = module_name_to_str(&importing_module_name);
let imported_module_name = CStr::from_ptr(imported_module_name);
let imported_module_name = module_name_to_str(&imported_module_name);
if !map.imports.contains_key(importing_module_name) {
map.imports.insert(importing_module_name.to_owned(), vec![]);
}
map.imports
.get_mut(importing_module_name)
.unwrap()
.push(imported_module_name.to_owned());
}
let mut map = ThinLTOImports {
imports: FxHashMap(),
};
llvm::LLVMRustGetThinLTOModuleImports(data,
imported_module_callback,
&mut map as *mut _ as *mut libc::c_void);
map
}
pub fn save_to_file(&self, path: &Path) -> io::Result<()> {
use std::io::Write;
let file = File::create(path)?;
let mut writer = io::BufWriter::new(file);
for (importing_module_name, imported_modules) in &self.imports {
writeln!(writer, "{}", importing_module_name)?;
for imported_module in imported_modules {
writeln!(writer, " {}", imported_module)?;
}
writeln!(writer)?;
}
Ok(())
}
pub fn load_from_file(path: &Path) -> io::Result<ThinLTOImports> {
use std::io::BufRead;
let mut imports = FxHashMap();
let mut current_module = None;
let mut current_imports = vec![];
let file = File::open(path)?;
for line in io::BufReader::new(file).lines() {
let line = line?;
if line.is_empty() {
let importing_module = current_module
.take()
.expect("Importing module not set");
imports.insert(importing_module,
mem::replace(&mut current_imports, vec![]));
} else if line.starts_with(" ") {
// This is an imported module
assert_ne!(current_module, None);
current_imports.push(line.trim().to_string());
} else {
// This is the beginning of a new module
assert_eq!(current_module, None);
current_module = Some(line.trim().to_string());
}
}
Ok(ThinLTOImports {
imports
})
}
}

View file

@ -696,7 +696,7 @@ unsafe fn codegen(cgcx: &CodegenContext,
if config.emit_bc_compressed {
let dst = bc_out.with_extension(RLIB_BYTECODE_EXTENSION);
let data = bytecode::encode(&module.llmod_id, data);
let data = bytecode::encode(&module.name, data);
if let Err(e) = fs::write(&dst, data) {
diag_handler.err(&format!("failed to write bytecode: {}", e));
}
@ -1308,7 +1308,6 @@ fn execute_work_item(cgcx: &CodegenContext,
assert_eq!(bytecode_compressed.is_some(), config.emit_bc_compressed);
Ok(WorkItemResult::Compiled(CompiledModule {
llmod_id: module.llmod_id.clone(),
name: module_name,
kind: ModuleKind::Regular,
pre_existing: true,

View file

@ -29,7 +29,7 @@ use super::ModuleCodegen;
use super::ModuleKind;
use abi;
use back::link;
use back::{link, lto};
use back::write::{self, OngoingCodegen, create_target_machine};
use llvm::{ContextRef, ModuleRef, ValueRef, Vector, get_param};
use llvm;
@ -739,15 +739,18 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
let link_meta = link::build_link_meta(crate_hash);
// Codegen the metadata.
let llmod_id = "metadata";
let metadata_cgu_name = CodegenUnit::build_cgu_name(tcx,
LOCAL_CRATE,
&["crate"],
Some("metadata")).as_str()
.to_string();
let (metadata_llcx, metadata_llmod, metadata) =
time(tcx.sess, "write metadata", || {
write_metadata(tcx, llmod_id, &link_meta)
write_metadata(tcx, &metadata_cgu_name, &link_meta)
});
let metadata_module = ModuleCodegen {
name: link::METADATA_MODULE_NAME.to_string(),
llmod_id: llmod_id.to_string(),
name: metadata_cgu_name,
source: ModuleSource::Codegened(ModuleLlvm {
llcx: metadata_llcx,
llmod: metadata_llmod,
@ -810,26 +813,30 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// Codegen an allocator shim, if any
let allocator_module = if let Some(kind) = *tcx.sess.allocator_kind.get() {
unsafe {
let llmod_id = "allocator";
let (llcx, llmod) =
context::create_context_and_module(tcx.sess, llmod_id);
let modules = ModuleLlvm {
llmod,
llcx,
tm: create_target_machine(tcx.sess, false),
};
time(tcx.sess, "write allocator module", || {
let llmod_id = CodegenUnit::build_cgu_name(tcx,
LOCAL_CRATE,
&["crate"],
Some("allocator")).as_str()
.to_string();
let (llcx, llmod) = unsafe {
context::create_context_and_module(tcx.sess, &llmod_id)
};
let modules = ModuleLlvm {
llmod,
llcx,
tm: create_target_machine(tcx.sess, false),
};
time(tcx.sess, "write allocator module", || {
unsafe {
allocator::codegen(tcx, &modules, kind)
});
}
});
Some(ModuleCodegen {
name: link::ALLOCATOR_MODULE_NAME.to_string(),
llmod_id: llmod_id.to_string(),
source: ModuleSource::Codegened(modules),
kind: ModuleKind::Allocator,
})
}
Some(ModuleCodegen {
name: llmod_id,
source: ModuleSource::Codegened(modules),
kind: ModuleKind::Allocator,
})
} else {
None
};
@ -872,21 +879,10 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
// succeed it means that none of the dependencies has changed
// and we can safely re-use.
if let Some(dep_node_index) = tcx.dep_graph.try_mark_green(tcx, dep_node) {
// Append ".rs" to LLVM module identifier.
//
// LLVM code generator emits a ".file filename" directive
// for ELF backends. Value of the "filename" is set as the
// LLVM module identifier. Due to a LLVM MC bug[1], LLVM
// crashes if the module identifier is same as other symbols
// such as a function name in the module.
// 1. http://llvm.org/bugs/show_bug.cgi?id=11479
let llmod_id = format!("{}.rs", cgu.name());
let module = ModuleCodegen {
name: cgu.name().to_string(),
source: ModuleSource::Preexisting(buf),
kind: ModuleKind::Regular,
llmod_id,
};
tcx.dep_graph.mark_loaded_from_cache(dep_node_index, true);
write::submit_codegened_module_to_llvm(tcx, module, 0);
@ -1195,21 +1191,8 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
{
let cgu_name = cgu.name().to_string();
// Append ".rs" to LLVM module identifier.
//
// LLVM code generator emits a ".file filename" directive
// for ELF backends. Value of the "filename" is set as the
// LLVM module identifier. Due to a LLVM MC bug[1], LLVM
// crashes if the module identifier is same as other symbols
// such as a function name in the module.
// 1. http://llvm.org/bugs/show_bug.cgi?id=11479
let llmod_id = format!("{}-{}.rs",
cgu.name(),
tcx.crate_disambiguator(LOCAL_CRATE)
.to_fingerprint().to_hex());
// Instantiate monomorphizations without filling out definitions yet...
let cx = CodegenCx::new(tcx, cgu, &llmod_id);
let cx = CodegenCx::new(tcx, cgu);
let module = {
let mono_items = cx.codegen_unit
.items_in_deterministic_order(cx.tcx);
@ -1267,7 +1250,6 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
name: cgu_name,
source: ModuleSource::Codegened(llvm_module),
kind: ModuleKind::Regular,
llmod_id,
}
};
@ -1370,6 +1352,27 @@ mod temp_stable_hash_impls {
}
}
#[allow(unused)]
fn load_thin_lto_imports(sess: &Session) -> lto::ThinLTOImports {
let path = rustc_incremental::in_incr_comp_dir_sess(
sess,
lto::THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME
);
if !path.exists() {
return lto::ThinLTOImports::new();
}
match lto::ThinLTOImports::load_from_file(&path) {
Ok(imports) => imports,
Err(e) => {
let msg = format!("Error while trying to load ThinLTO import data \
for incremental compilation: {}", e);
sess.fatal(&msg)
}
}
}
pub fn define_custom_section(cx: &CodegenCx, def_id: DefId) {
use rustc::mir::interpret::GlobalId;
@ -1408,3 +1411,4 @@ pub fn define_custom_section(cx: &CodegenCx, def_id: DefId) {
);
}
}

View file

@ -214,8 +214,7 @@ pub unsafe fn create_context_and_module(sess: &Session, mod_name: &str) -> (Cont
impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
pub fn new(tcx: TyCtxt<'a, 'tcx, 'tcx>,
codegen_unit: Arc<CodegenUnit<'tcx>>,
llmod_id: &str)
codegen_unit: Arc<CodegenUnit<'tcx>>)
-> CodegenCx<'a, 'tcx> {
// An interesting part of Windows which MSVC forces our hand on (and
// apparently MinGW didn't) is the usage of `dllimport` and `dllexport`
@ -268,7 +267,7 @@ impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
unsafe {
let (llcx, llmod) = create_context_and_module(&tcx.sess,
&llmod_id[..]);
&codegen_unit.name().as_str());
let dbg_cx = if tcx.sess.opts.debuginfo != NoDebugInfo {
let dctx = debuginfo::CrateDebugContext::new(llmod);

View file

@ -90,7 +90,7 @@ mod back {
mod command;
pub mod linker;
pub mod link;
mod lto;
pub mod lto;
pub mod symbol_export;
pub mod write;
mod rpath;
@ -258,8 +258,8 @@ struct ModuleCodegen {
/// unique amongst **all** crates. Therefore, it should contain
/// something unique to this crate (e.g., a module path) as well
/// as the crate name and disambiguator.
/// We currently generate these names via CodegenUnit::build_cgu_name().
name: String,
llmod_id: String,
source: ModuleSource,
kind: ModuleKind,
}
@ -306,7 +306,6 @@ impl ModuleCodegen {
};
CompiledModule {
llmod_id: self.llmod_id,
name: self.name.clone(),
kind: self.kind,
pre_existing,
@ -320,7 +319,6 @@ impl ModuleCodegen {
#[derive(Debug)]
struct CompiledModule {
name: String,
llmod_id: String,
kind: ModuleKind,
pre_existing: bool,
object: Option<PathBuf>,