From e60b0f802b438d350ccd1ec1b42f515994b1a4c0 Mon Sep 17 00:00:00 2001 From: varkor Date: Mon, 15 Jan 2018 18:28:34 +0000 Subject: [PATCH] Refactor CodegenUnit size estimates --- src/librustc/mir/mono.rs | 40 ++++++++++++++- src/librustc_mir/monomorphize/partitioning.rs | 50 ++++--------------- src/librustc_trans/base.rs | 4 +- 3 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/librustc/mir/mono.rs b/src/librustc/mir/mono.rs index efdf4066815f..bbef045a305a 100644 --- a/src/librustc/mir/mono.rs +++ b/src/librustc/mir/mono.rs @@ -10,7 +10,7 @@ use syntax::ast::NodeId; use syntax::symbol::InternedString; -use ty::Instance; +use ty::{Instance, TyCtxt}; use util::nodemap::FxHashMap; use rustc_data_structures::base_n; use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult, @@ -25,6 +25,22 @@ pub enum MonoItem<'tcx> { GlobalAsm(NodeId), } +impl<'tcx> MonoItem<'tcx> { + pub fn size_estimate<'a>(&self, tcx: &TyCtxt<'a, 'tcx, 'tcx>) -> usize { + match *self { + MonoItem::Fn(instance) => { + // Estimate the size of a function based on how many statements + // it contains. + let mir = tcx.instance_mir(instance.def); + mir.basic_blocks().iter().map(|bb| bb.statements.len()).sum() + }, + // Conservatively estimate the size of a static declaration + // or assembly to be 1. + MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1, + } + } +} + impl<'tcx> HashStable> for MonoItem<'tcx> { fn hash_stable(&self, hcx: &mut StableHashingContext<'tcx>, @@ -52,6 +68,7 @@ pub struct CodegenUnit<'tcx> { /// as well as the crate name and disambiguator. name: InternedString, items: FxHashMap, (Linkage, Visibility)>, + size_estimate: Option, } #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] @@ -101,6 +118,7 @@ impl<'tcx> CodegenUnit<'tcx> { CodegenUnit { name: name, items: FxHashMap(), + size_estimate: None, } } @@ -131,6 +149,25 @@ impl<'tcx> CodegenUnit<'tcx> { let hash = hash & ((1u128 << 80) - 1); base_n::encode(hash, base_n::CASE_INSENSITIVE) } + + pub fn estimate_size<'a>(&mut self, tcx: &TyCtxt<'a, 'tcx, 'tcx>) { + // Estimate the size of a codegen unit as (approximately) the number of MIR + // statements it corresponds to. + self.size_estimate = Some(self.items.keys().map(|mi| mi.size_estimate(tcx)).sum()); + } + + pub fn size_estimate(&self) -> usize { + // Should only be called if `estimate_size` has previously been called. + assert!(self.size_estimate.is_some()); + self.size_estimate.unwrap() + } + + pub fn modify_size_estimate(&mut self, delta: usize) { + assert!(self.size_estimate.is_some()); + if let Some(size_estimate) = self.size_estimate { + self.size_estimate = Some(size_estimate + delta); + } + } } impl<'tcx> HashStable> for CodegenUnit<'tcx> { @@ -140,6 +177,7 @@ impl<'tcx> HashStable> for CodegenUnit<'tcx> { let CodegenUnit { ref items, name, + .. } = *self; name.hash_stable(hcx, hasher); diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs index d8ec074b8a46..4150f9f95485 100644 --- a/src/librustc_mir/monomorphize/partitioning.rs +++ b/src/librustc_mir/monomorphize/partitioning.rs @@ -110,7 +110,7 @@ use rustc::mir::mono::{Linkage, Visibility}; use rustc::ty::{self, TyCtxt, InstanceDef}; use rustc::ty::item_path::characteristic_def_id_of_type; use rustc::util::nodemap::{FxHashMap, FxHashSet}; -use std::collections::hash_map::{HashMap, Entry}; +use std::collections::hash_map::Entry; use syntax::ast::NodeId; use syntax::symbol::{Symbol, InternedString}; use rustc::mir::mono::MonoItem; @@ -225,12 +225,14 @@ pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let mut initial_partitioning = place_root_translation_items(tcx, trans_items); + initial_partitioning.codegen_units.iter_mut().for_each(|cgu| cgu.estimate_size(&tcx)); + debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter()); // If the partitioning should produce a fixed count of codegen units, merge // until that count is reached. if let PartitioningStrategy::FixedUnitCount(count) = strategy { - merge_codegen_units(tcx, &mut initial_partitioning, count, &tcx.crate_name.as_str()); + merge_codegen_units(&mut initial_partitioning, count, &tcx.crate_name.as_str()); debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter()); } @@ -242,6 +244,8 @@ pub fn partition<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let mut post_inlining = place_inlined_translation_items(initial_partitioning, inlining_map); + post_inlining.codegen_units.iter_mut().for_each(|cgu| cgu.estimate_size(&tcx)); + debug_dump(tcx, "POST INLINING:", post_inlining.codegen_units.iter()); // Next we try to make as many symbols "internal" as possible, so LLVM has @@ -405,8 +409,7 @@ fn place_root_translation_items<'a, 'tcx, I>(tcx: TyCtxt<'a, 'tcx, 'tcx>, } } -fn merge_codegen_units<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, - initial_partitioning: &mut PreInliningPartitioning<'tcx>, +fn merge_codegen_units<'tcx>(initial_partitioning: &mut PreInliningPartitioning<'tcx>, target_cgu_count: usize, crate_name: &str) { assert!(target_cgu_count >= 1); @@ -423,51 +426,16 @@ fn merge_codegen_units<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, // the stable sort below will keep everything nice and deterministic. codegen_units.sort_by_key(|cgu| cgu.name().clone()); - // Estimate the size of a codegen unit as (approximately) the number of MIR - // statements it corresponds to. - fn codegen_unit_size_estimate<'a, 'tcx>(cgu: &CodegenUnit<'tcx>, - mono_item_sizes: &HashMap) - -> usize { - cgu.items().keys().map(|mi| mono_item_sizes.get(mi).unwrap()).sum() - } - - fn mono_item_size_estimate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, - item: &MonoItem<'tcx>) - -> usize { - match item { - MonoItem::Fn(instance) => { - // Estimate the size of a function based on how many statements - // it contains. - let mir = tcx.instance_mir(instance.def); - mir.basic_blocks().iter().map(|bb| bb.statements.len()).sum() - }, - // Conservatively estimate the size of a static declaration - // or assembly to be 1. - MonoItem::Static(_) | MonoItem::GlobalAsm(_) => 1, - } - } - - // Since `sort_by_key` currently recomputes the keys for each comparison, - // we can save unnecessary recomputations by storing size estimates for - // each `MonoItem`. Storing estimates for `CodegenUnit` might be preferable, - // but its structure makes it awkward to use as a key and additionally their - // sizes change as the merging occurs, requiring the map to be updated. - let mut sizes: HashMap = HashMap::new(); - for mis in codegen_units.iter().map(|cgu| cgu.items().keys()) { - mis.for_each(|mi| { - sizes.entry(*mi).or_insert_with(|| mono_item_size_estimate(tcx, mi)); - }); - } - // Merge the two smallest codegen units until the target size is reached. // Note that "size" is estimated here rather inaccurately as the number of // translation items in a given unit. This could be improved on. while codegen_units.len() > target_cgu_count { // Sort small cgus to the back - codegen_units.sort_by_key(|cgu| usize::MAX - codegen_unit_size_estimate(cgu, &sizes)); + codegen_units.sort_by_key(|cgu| usize::MAX - cgu.size_estimate()); let mut smallest = codegen_units.pop().unwrap(); let second_smallest = codegen_units.last_mut().unwrap(); + second_smallest.modify_size_estimate(smallest.size_estimate()); for (k, v) in smallest.items_mut().drain() { second_smallest.items_mut().insert(k, v); } diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs index 633ed9b32cd1..e03b6ee794d6 100644 --- a/src/librustc_trans/base.rs +++ b/src/librustc_trans/base.rs @@ -79,7 +79,7 @@ use std::ffi::CString; use std::str; use std::sync::Arc; use std::time::{Instant, Duration}; -use std::i32; +use std::{i32, usize}; use std::iter; use std::sync::mpsc; use syntax_pos::Span; @@ -829,7 +829,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, // account the size of each TransItem. let codegen_units = { let mut codegen_units = codegen_units; - codegen_units.sort_by_key(|cgu| -(cgu.items().len() as isize)); + codegen_units.sort_by_key(|cgu| usize::MAX - cgu.size_estimate()); codegen_units };