From 7f482808f9700259da6585597486166148576d96 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Fri, 9 Jun 2017 15:01:44 +0200 Subject: [PATCH] incr.comp.: Clean up and optimize dep-graph loading. --- src/librustc_incremental/persist/data.rs | 21 ++-- .../persist/dirty_clean.rs | 13 +- src/librustc_incremental/persist/load.rs | 114 ++++++++---------- src/librustc_incremental/persist/save.rs | 57 ++++++--- 4 files changed, 116 insertions(+), 89 deletions(-) diff --git a/src/librustc_incremental/persist/data.rs b/src/librustc_incremental/persist/data.rs index c3cd9656afe5..06acfb5d7780 100644 --- a/src/librustc_incremental/persist/data.rs +++ b/src/librustc_incremental/persist/data.rs @@ -26,7 +26,7 @@ pub struct SerializedDepGraph { /// For each DepNode, stores the list of edges originating from that /// DepNode. Encoded as a [start, end) pair indexing into edge_list_data, /// which holds the actual DepNodeIndices of the target nodes. - pub edge_list_indices: Vec<(u32, u32)>, + pub edge_list_indices: IndexVec, /// A flattened list of all edge targets in the graph. Edge sources are /// implicit in edge_list_indices. pub edge_list_data: Vec, @@ -55,7 +55,14 @@ pub struct SerializedDepGraph { /// will be different when we next compile) related to each node, /// but rather the `DefPathIndex`. This can then be retraced /// to find the current def-id. - pub hashes: Vec, + pub hashes: Vec<(DepNodeIndex, Fingerprint)>, +} + +impl SerializedDepGraph { + pub fn edge_targets_from(&self, source: DepNodeIndex) -> &[DepNodeIndex] { + let targets = self.edge_list_indices[source]; + &self.edge_list_data[targets.0 as usize .. targets.1 as usize] + } } /// The index of a DepNode in the SerializedDepGraph::nodes array. @@ -84,16 +91,6 @@ impl Idx for DepNodeIndex { } } -#[derive(Debug, RustcEncodable, RustcDecodable)] -pub struct SerializedHash { - /// def-id of thing being hashed - pub dep_node: DepNode, - - /// the hash as of previous compilation, computed by code in - /// `hash` module - pub hash: Fingerprint, -} - #[derive(Debug, RustcEncodable, RustcDecodable)] pub struct SerializedWorkProduct { /// node that produced the work-product diff --git a/src/librustc_incremental/persist/dirty_clean.rs b/src/librustc_incremental/persist/dirty_clean.rs index 01ec0d685268..3f3dc10365c6 100644 --- a/src/librustc_incremental/persist/dirty_clean.rs +++ b/src/librustc_incremental/persist/dirty_clean.rs @@ -40,6 +40,7 @@ //! previous revision to compare things to. //! +use super::data::DepNodeIndex; use super::load::DirtyNodes; use rustc::dep_graph::{DepGraphQuery, DepNode, DepKind}; use rustc::hir; @@ -50,6 +51,7 @@ use rustc::ich::{Fingerprint, ATTR_DIRTY, ATTR_CLEAN, ATTR_DIRTY_METADATA, ATTR_CLEAN_METADATA}; use syntax::ast::{self, Attribute, NestedMetaItem}; use rustc_data_structures::fx::{FxHashSet, FxHashMap}; +use rustc_data_structures::indexed_vec::IndexVec; use syntax_pos::Span; use rustc::ty::TyCtxt; @@ -57,6 +59,7 @@ const LABEL: &'static str = "label"; const CFG: &'static str = "cfg"; pub fn check_dirty_clean_annotations<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, + nodes: &IndexVec, dirty_inputs: &DirtyNodes) { // can't add `#[rustc_dirty]` etc without opting in to this feature if !tcx.sess.features.borrow().rustc_attrs { @@ -66,8 +69,14 @@ pub fn check_dirty_clean_annotations<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let _ignore = tcx.dep_graph.in_ignore(); let dirty_inputs: FxHashSet = dirty_inputs.keys() - .filter(|dep_node| dep_node.extract_def_id(tcx).is_some()) - .cloned() + .filter_map(|dep_node_index| { + let dep_node = nodes[*dep_node_index]; + if dep_node.extract_def_id(tcx).is_some() { + Some(dep_node) + } else { + None + } + }) .collect(); let query = tcx.dep_graph.query(); diff --git a/src/librustc_incremental/persist/load.rs b/src/librustc_incremental/persist/load.rs index 53fccfaa3929..28a00bf4aa6c 100644 --- a/src/librustc_incremental/persist/load.rs +++ b/src/librustc_incremental/persist/load.rs @@ -17,9 +17,9 @@ use rustc::ich::Fingerprint; use rustc::session::Session; use rustc::ty::TyCtxt; use rustc_data_structures::fx::{FxHashSet, FxHashMap}; +use rustc_data_structures::indexed_vec::IndexVec; use rustc_serialize::Decodable as RustcDecodable; use rustc_serialize::opaque::Decoder; -use std::default::Default; use std::path::{Path}; use IncrementalHashesMap; @@ -32,7 +32,7 @@ use super::work_product; // The key is a dirty node. The value is **some** base-input that we // can blame it on. -pub type DirtyNodes = FxHashMap; +pub type DirtyNodes = FxHashMap; /// If we are in incremental mode, and a previous dep-graph exists, /// then load up those nodes/edges that are still valid into the @@ -166,48 +166,35 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let serialized_dep_graph = SerializedDepGraph::decode(&mut dep_graph_decoder)?; - let edge_map: FxHashMap> = { - let capacity = serialized_dep_graph.edge_list_data.len(); - let mut edge_map = FxHashMap::with_capacity_and_hasher(capacity, Default::default()); - - for (node_index, source) in serialized_dep_graph.nodes.iter().enumerate() { - let (start, end) = serialized_dep_graph.edge_list_indices[node_index]; - let targets = - (&serialized_dep_graph.edge_list_data[start as usize .. end as usize]) - .into_iter() - .map(|&node_index| serialized_dep_graph.nodes[node_index].clone()) - .collect(); - - edge_map.insert(source.clone(), targets); - } - - edge_map - }; - // Compute the set of nodes from the old graph where some input - // has changed or been removed. These are "raw" source nodes, - // which means that they still use the original `DefPathIndex` - // values from the encoding, rather than having been retraced to a - // `DefId`. The reason for this is that this way we can include - // nodes that have been removed (which no longer have a `DefId` in - // the current compilation). + // has changed or been removed. let dirty_raw_nodes = initial_dirty_nodes(tcx, incremental_hashes_map, + &serialized_dep_graph.nodes, &serialized_dep_graph.hashes); - let dirty_raw_nodes = transitive_dirty_nodes(&edge_map, dirty_raw_nodes); + let dirty_raw_nodes = transitive_dirty_nodes(&serialized_dep_graph, + dirty_raw_nodes); // Recreate the edges in the graph that are still clean. let mut clean_work_products = FxHashSet(); let mut dirty_work_products = FxHashSet(); // incomplete; just used to suppress debug output - for (source, targets) in &edge_map { - for target in targets { - process_edge(tcx, source, target, &dirty_raw_nodes, - &mut clean_work_products, &mut dirty_work_products); + for (source, targets) in serialized_dep_graph.edge_list_indices.iter_enumerated() { + let target_begin = targets.0 as usize; + let target_end = targets.1 as usize; + + for &target in &serialized_dep_graph.edge_list_data[target_begin .. target_end] { + process_edge(tcx, + source, + target, + &serialized_dep_graph.nodes, + &dirty_raw_nodes, + &mut clean_work_products, + &mut dirty_work_products); } } - // Recreate bootstrap outputs, which are outputs that have no incoming edges (and hence cannot - // be dirty). + // Recreate bootstrap outputs, which are outputs that have no incoming edges + // (and hence cannot be dirty). for bootstrap_output in &serialized_dep_graph.bootstrap_outputs { if let DepKind::WorkProduct = bootstrap_output.kind { let wp_id = WorkProductId::from_fingerprint(bootstrap_output.hash); @@ -225,7 +212,9 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, // dirty. reconcile_work_products(tcx, work_products, &clean_work_products); - dirty_clean::check_dirty_clean_annotations(tcx, &dirty_raw_nodes); + dirty_clean::check_dirty_clean_annotations(tcx, + &serialized_dep_graph.nodes, + &dirty_raw_nodes); load_prev_metadata_hashes(tcx, &mut *incremental_hashes_map.prev_metadata_hashes.borrow_mut()); @@ -236,19 +225,20 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, /// a bit vector where the index is the DefPathIndex. fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, incremental_hashes_map: &IncrementalHashesMap, - serialized_hashes: &[SerializedHash]) + nodes: &IndexVec, + serialized_hashes: &[(DepNodeIndex, Fingerprint)]) -> DirtyNodes { let mut hcx = HashContext::new(tcx, incremental_hashes_map); let mut dirty_nodes = FxHashMap(); - for hash in serialized_hashes { - let dep_node = hash.dep_node; + for &(dep_node_index, prev_hash) in serialized_hashes { + let dep_node = nodes[dep_node_index]; if does_still_exist(tcx, &dep_node) { let current_hash = hcx.hash(&dep_node).unwrap_or_else(|| { bug!("Cannot find current ICH for input that still exists?") }); - if current_hash == hash.hash { + if current_hash == prev_hash { debug!("initial_dirty_nodes: {:?} is clean (hash={:?})", dep_node, current_hash); @@ -259,13 +249,13 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, println!("node {:?} is dirty as hash is {:?}, was {:?}", dep_node, current_hash, - hash.hash); + prev_hash); } debug!("initial_dirty_nodes: {:?} is dirty as hash is {:?}, was {:?}", dep_node, current_hash, - hash.hash); + prev_hash); } else { if tcx.sess.opts.debugging_opts.incremental_dump_hash { println!("node {:?} is dirty as it was removed", dep_node); @@ -273,30 +263,27 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, debug!("initial_dirty_nodes: {:?} is dirty as it was removed", dep_node); } - - dirty_nodes.insert(hash.dep_node.clone(), hash.dep_node.clone()); + dirty_nodes.insert(dep_node_index, dep_node_index); } dirty_nodes } -fn transitive_dirty_nodes(edge_map: &FxHashMap>, +fn transitive_dirty_nodes(serialized_dep_graph: &SerializedDepGraph, mut dirty_nodes: DirtyNodes) -> DirtyNodes { - let mut stack: Vec<(DepNode, DepNode)> = vec![]; - stack.extend(dirty_nodes.iter().map(|(s, b)| (s.clone(), b.clone()))); + let mut stack: Vec<(DepNodeIndex, DepNodeIndex)> = vec![]; + stack.extend(dirty_nodes.iter().map(|(&s, &b)| (s, b))); while let Some((source, blame)) = stack.pop() { // we know the source is dirty (because of the node `blame`)... - assert!(dirty_nodes.contains_key(&source)); + debug_assert!(dirty_nodes.contains_key(&source)); // ...so we dirty all the targets (with the same blame) - if let Some(targets) = edge_map.get(&source) { - for target in targets { - if !dirty_nodes.contains_key(target) { - dirty_nodes.insert(target.clone(), blame.clone()); - stack.push((target.clone(), blame.clone())); - } + for &target in serialized_dep_graph.edge_targets_from(source) { + if !dirty_nodes.contains_key(&target) { + dirty_nodes.insert(target, blame); + stack.push((target, blame)); } } } @@ -402,8 +389,9 @@ fn load_prev_metadata_hashes(tcx: TyCtxt, fn process_edge<'a, 'tcx, 'edges>( tcx: TyCtxt<'a, 'tcx, 'tcx>, - source: &'edges DepNode, - target: &'edges DepNode, + source: DepNodeIndex, + target: DepNodeIndex, + nodes: &IndexVec, dirty_raw_nodes: &DirtyNodes, clean_work_products: &mut FxHashSet, dirty_work_products: &mut FxHashSet) @@ -411,7 +399,8 @@ fn process_edge<'a, 'tcx, 'edges>( // If the target is dirty, skip the edge. If this is an edge // that targets a work-product, we can print the blame // information now. - if let Some(blame) = dirty_raw_nodes.get(target) { + if let Some(&blame) = dirty_raw_nodes.get(&target) { + let target = nodes[target]; if let DepKind::WorkProduct = target.kind { if tcx.sess.opts.debugging_opts.incremental_info { let wp_id = WorkProductId::from_fingerprint(target.hash); @@ -420,6 +409,7 @@ fn process_edge<'a, 'tcx, 'edges>( // Try to reconstruct the human-readable version of the // DepNode. This cannot be done for things that where // removed. + let blame = nodes[blame]; let blame_str = if let Some(def_id) = blame.extract_def_id(tcx) { format!("{:?}({})", blame.kind, @@ -444,21 +434,23 @@ fn process_edge<'a, 'tcx, 'edges>( // We should never have an edge where the target is clean but the source // was dirty. Otherwise something was wrong with the dirtying pass above: - debug_assert!(!dirty_raw_nodes.contains_key(source)); + debug_assert!(!dirty_raw_nodes.contains_key(&source)); // We also never should encounter an edge going from a removed input to a // clean target because removing the input would have dirtied the input // node and transitively dirtied the target. - debug_assert!(match source.kind { + debug_assert!(match nodes[source].kind { DepKind::Hir | DepKind::HirBody | DepKind::MetaData => { - does_still_exist(tcx, source) + does_still_exist(tcx, &nodes[source]) } _ => true, }); - if !dirty_raw_nodes.contains_key(target) { - let _task = tcx.dep_graph.in_task(*target); - tcx.dep_graph.read(*source); + if !dirty_raw_nodes.contains_key(&target) { + let target = nodes[target]; + let source = nodes[source]; + let _task = tcx.dep_graph.in_task(target); + tcx.dep_graph.read(source); if let DepKind::WorkProduct = target.kind { let wp_id = WorkProductId::from_fingerprint(target.hash); diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs index 48742b424f1e..867452d97e8f 100644 --- a/src/librustc_incremental/persist/save.rs +++ b/src/librustc_incremental/persist/save.rs @@ -174,14 +174,14 @@ pub fn encode_dep_graph(tcx: TyCtxt, tcx.sess.opts.dep_tracking_hash().encode(encoder)?; // NB: We rely on this Vec being indexable by reduced_graph's NodeIndex. - let nodes: IndexVec = preds + let mut nodes: IndexVec = preds .reduced_graph .all_nodes() .iter() .map(|node| node.data.clone()) .collect(); - let mut edge_list_indices = Vec::with_capacity(nodes.len()); + let mut edge_list_indices = IndexVec::with_capacity(nodes.len()); let mut edge_list_data = Vec::with_capacity(preds.reduced_graph.len_edges()); for node_index in 0 .. nodes.len() { @@ -196,7 +196,7 @@ pub fn encode_dep_graph(tcx: TyCtxt, edge_list_indices.push((start, end)); } - // Let's make we had no overflow there. + // Let's make sure we had no overflow there. assert!(edge_list_data.len() <= ::std::u32::MAX as usize); // Check that we have a consistent number of edges. assert_eq!(edge_list_data.len(), preds.reduced_graph.len_edges()); @@ -206,23 +206,52 @@ pub fn encode_dep_graph(tcx: TyCtxt, .map(|dep_node| (**dep_node).clone()) .collect(); - let hashes = preds - .hashes - .iter() - .map(|(&dep_node, &hash)| { - SerializedHash { - dep_node: dep_node.clone(), - hash: hash, - } - }) - .collect(); + // Next, build the map of content hashes. To this end, we need to transform + // the (DepNode -> Fingerprint) map that we have into a + // (DepNodeIndex -> Fingerprint) map. This may necessitate adding nodes back + // to the dep-graph that have been filtered out during reduction. + let content_hashes = { + // We have to build a (DepNode -> DepNodeIndex) map. We over-allocate a + // little because we expect some more nodes to be added. + let capacity = (nodes.len() * 120) / 100; + let mut node_to_index = FxHashMap::with_capacity_and_hasher(capacity, + Default::default()); + // Add the nodes we already have in the graph. + node_to_index.extend(nodes.iter_enumerated() + .map(|(index, &node)| (node, index))); + + let mut content_hashes = Vec::with_capacity(preds.hashes.len()); + + for (&&dep_node, &hash) in preds.hashes.iter() { + let dep_node_index = *node_to_index + .entry(dep_node) + .or_insert_with(|| { + // There is no DepNodeIndex for this DepNode yet. This + // happens when the DepNode got filtered out during graph + // reduction. Since we have a content hash for the DepNode, + // we add it back to the graph. + let next_index = nodes.len(); + nodes.push(dep_node); + + debug_assert_eq!(next_index, edge_list_indices.len()); + // Push an empty list of edges + edge_list_indices.push((0,0)); + + DepNodeIndex::new(next_index) + }); + + content_hashes.push((dep_node_index, hash)); + } + + content_hashes + }; let graph = SerializedDepGraph { nodes, edge_list_indices, edge_list_data, bootstrap_outputs, - hashes, + hashes: content_hashes, }; // Encode the graph data.