Auto merge of #45032 - alexcrichton:target-cfu, r=michaelwoerister
rustc: Allow target-specific default cgus Some targets, like msp430 and nvptx, don't work with multiple codegen units right now for bugs or fundamental reasons. To expose this allow targets to express a default. Closes #45000
This commit is contained in:
commit
43d95e2ce9
7 changed files with 56 additions and 50 deletions
|
|
@ -352,7 +352,7 @@ top_level_options!(
|
|||
actually_rustdoc: bool [TRACKED],
|
||||
|
||||
// Number of object files/codegen units to produce on the backend
|
||||
codegen_units: usize [UNTRACKED],
|
||||
cli_forced_codegen_units: Option<usize> [UNTRACKED],
|
||||
}
|
||||
);
|
||||
|
||||
|
|
@ -505,7 +505,7 @@ pub fn basic_options() -> Options {
|
|||
unstable_features: UnstableFeatures::Disallow,
|
||||
debug_assertions: true,
|
||||
actually_rustdoc: false,
|
||||
codegen_units: 1,
|
||||
cli_forced_codegen_units: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1711,48 +1711,6 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
|
|||
|
||||
let incremental = debugging_opts.incremental.as_ref().map(|m| PathBuf::from(m));
|
||||
|
||||
let codegen_units = codegen_units.unwrap_or_else(|| {
|
||||
match opt_level {
|
||||
// If we're compiling at `-O0` then default to 16 codegen units.
|
||||
// The number here shouldn't matter too too much as debug mode
|
||||
// builds don't rely on performance at all, meaning that lost
|
||||
// opportunities for inlining through multiple codegen units is
|
||||
// a non-issue.
|
||||
//
|
||||
// Note that the high number here doesn't mean that we'll be
|
||||
// spawning a large number of threads in parallel. The backend
|
||||
// of rustc contains global rate limiting through the
|
||||
// `jobserver` crate so we'll never overload the system with too
|
||||
// much work, but rather we'll only be optimizing when we're
|
||||
// otherwise cooperating with other instances of rustc.
|
||||
//
|
||||
// Rather the high number here means that we should be able to
|
||||
// keep a lot of idle cpus busy. By ensuring that no codegen
|
||||
// unit takes *too* long to build we'll be guaranteed that all
|
||||
// cpus will finish pretty closely to one another and we should
|
||||
// make relatively optimal use of system resources
|
||||
//
|
||||
// Another note worth mentioning here, however, is that this number
|
||||
// isn't *too* high. When codegen units are increased that means we
|
||||
// currently have to codegen `#[inline]` functions into each codegen
|
||||
// unit, which means the more codegen units we're using the more we
|
||||
// may be generating. In other words, increasing codegen units may
|
||||
// increase the overall work the compiler does. If we don't have
|
||||
// enough cores to make up for this loss then increasing the number
|
||||
// of codegen units could become an overall loss!
|
||||
//
|
||||
// As a result we choose a hopefully conservative value 16, which
|
||||
// should be more than the number of cpus of most hardware compiling
|
||||
// Rust but also not too much for 2-4 core machines to have too much
|
||||
// loss of compile time.
|
||||
OptLevel::No => 16,
|
||||
|
||||
// All other optimization levels default use one codegen unit,
|
||||
// the historical default in Rust for a Long Time.
|
||||
_ => 1,
|
||||
}
|
||||
});
|
||||
|
||||
(Options {
|
||||
crate_types,
|
||||
optimize: opt_level,
|
||||
|
|
@ -1777,7 +1735,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
|
|||
unstable_features: UnstableFeatures::from_environment(),
|
||||
debug_assertions,
|
||||
actually_rustdoc: false,
|
||||
codegen_units,
|
||||
cli_forced_codegen_units: codegen_units,
|
||||
},
|
||||
cfg)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -636,6 +636,43 @@ impl Session {
|
|||
}
|
||||
ret
|
||||
}
|
||||
|
||||
/// Returns the number of codegen units that should be used for this
|
||||
/// compilation
|
||||
pub fn codegen_units(&self) -> usize {
|
||||
if let Some(n) = self.opts.cli_forced_codegen_units {
|
||||
return n
|
||||
}
|
||||
if let Some(n) = self.target.target.options.default_codegen_units {
|
||||
return n as usize
|
||||
}
|
||||
|
||||
match self.opts.optimize {
|
||||
// If we're compiling at `-O0` then default to 16 codegen units.
|
||||
// The number here shouldn't matter too too much as debug mode
|
||||
// builds don't rely on performance at all, meaning that lost
|
||||
// opportunities for inlining through multiple codegen units is
|
||||
// a non-issue.
|
||||
//
|
||||
// Note that the high number here doesn't mean that we'll be
|
||||
// spawning a large number of threads in parallel. The backend
|
||||
// of rustc contains global rate limiting through the
|
||||
// `jobserver` crate so we'll never overload the system with too
|
||||
// much work, but rather we'll only be optimizing when we're
|
||||
// otherwise cooperating with other instances of rustc.
|
||||
//
|
||||
// Rather the high number here means that we should be able to
|
||||
// keep a lot of idle cpus busy. By ensuring that no codegen
|
||||
// unit takes *too* long to build we'll be guaranteed that all
|
||||
// cpus will finish pretty closely to one another and we should
|
||||
// make relatively optimal use of system resources
|
||||
config::OptLevel::No => 16,
|
||||
|
||||
// All other optimization levels default use one codegen unit,
|
||||
// the historical default in Rust for a Long Time.
|
||||
_ => 1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn build_session(sopts: config::Options,
|
||||
|
|
|
|||
|
|
@ -430,6 +430,9 @@ pub struct TargetOptions {
|
|||
|
||||
/// The minimum alignment for global symbols.
|
||||
pub min_global_align: Option<u64>,
|
||||
|
||||
/// Default number of codegen units to use in debug mode
|
||||
pub default_codegen_units: Option<u64>,
|
||||
}
|
||||
|
||||
impl Default for TargetOptions {
|
||||
|
|
@ -492,6 +495,7 @@ impl Default for TargetOptions {
|
|||
crt_static_respected: false,
|
||||
stack_probes: false,
|
||||
min_global_align: None,
|
||||
default_codegen_units: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -732,6 +736,7 @@ impl Target {
|
|||
key!(crt_static_respected, bool);
|
||||
key!(stack_probes, bool);
|
||||
key!(min_global_align, Option<u64>);
|
||||
key!(default_codegen_units, Option<u64>);
|
||||
|
||||
if let Some(array) = obj.find("abi-blacklist").and_then(Json::as_array) {
|
||||
for name in array.iter().filter_map(|abi| abi.as_string()) {
|
||||
|
|
@ -924,6 +929,7 @@ impl ToJson for Target {
|
|||
target_option_val!(crt_static_respected);
|
||||
target_option_val!(stack_probes);
|
||||
target_option_val!(min_global_align);
|
||||
target_option_val!(default_codegen_units);
|
||||
|
||||
if default.abi_blacklist != self.options.abi_blacklist {
|
||||
d.insert("abi-blacklist".to_string(), self.options.abi_blacklist.iter()
|
||||
|
|
|
|||
|
|
@ -48,6 +48,11 @@ pub fn target() -> TargetResult {
|
|||
// code because of the extra costs it involves.
|
||||
relocation_model: "static".to_string(),
|
||||
|
||||
// Right now we invoke an external assembler and this isn't
|
||||
// compatible with multiple codegen units, and plus we probably
|
||||
// don't want to invoke that many gcc instances.
|
||||
default_codegen_units: Some(1),
|
||||
|
||||
.. Default::default( )
|
||||
}
|
||||
})
|
||||
|
|
|
|||
|
|
@ -467,7 +467,7 @@ fn link_rlib<'a>(sess: &'a Session,
|
|||
// of when we do and don't keep .#module-name#.bc files around.
|
||||
let user_wants_numbered_bitcode =
|
||||
sess.opts.output_types.contains_key(&OutputType::Bitcode) &&
|
||||
sess.opts.codegen_units > 1;
|
||||
sess.codegen_units() > 1;
|
||||
if !sess.opts.cg.save_temps && !user_wants_numbered_bitcode {
|
||||
remove(sess, &bc_filename);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1037,10 +1037,10 @@ fn produce_final_output_artifacts(sess: &Session,
|
|||
let needs_crate_object = crate_output.outputs.contains_key(&OutputType::Exe);
|
||||
|
||||
let keep_numbered_bitcode = needs_crate_bitcode ||
|
||||
(user_wants_bitcode && sess.opts.codegen_units > 1);
|
||||
(user_wants_bitcode && sess.codegen_units() > 1);
|
||||
|
||||
let keep_numbered_objects = needs_crate_object ||
|
||||
(user_wants_objects && sess.opts.codegen_units > 1);
|
||||
(user_wants_objects && sess.codegen_units() > 1);
|
||||
|
||||
for module in compiled_modules.modules.iter() {
|
||||
let module_name = Some(&module.name[..]);
|
||||
|
|
@ -2052,7 +2052,7 @@ impl OngoingCrateTranslation {
|
|||
|
||||
// FIXME: time_llvm_passes support - does this use a global context or
|
||||
// something?
|
||||
if sess.opts.codegen_units == 1 && sess.time_llvm_passes() {
|
||||
if sess.codegen_units() == 1 && sess.time_llvm_passes() {
|
||||
unsafe { llvm::LLVMRustPrintPassTimings(); }
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1218,7 +1218,7 @@ fn collect_and_partition_translation_items<'a, 'tcx>(
|
|||
let strategy = if tcx.sess.opts.debugging_opts.incremental.is_some() {
|
||||
PartitioningStrategy::PerModule
|
||||
} else {
|
||||
PartitioningStrategy::FixedUnitCount(tcx.sess.opts.codegen_units)
|
||||
PartitioningStrategy::FixedUnitCount(tcx.sess.codegen_units())
|
||||
};
|
||||
|
||||
let codegen_units = time(time_passes, "codegen unit partitioning", || {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue