From 7f482808f9700259da6585597486166148576d96 Mon Sep 17 00:00:00 2001
From: Michael Woerister <michaelwoerister@posteo.net>
Date: Fri, 9 Jun 2017 15:01:44 +0200
Subject: [PATCH] incr.comp.: Clean up and optimize dep-graph loading.

---
 src/librustc_incremental/persist/data.rs      |  21 ++--
 .../persist/dirty_clean.rs                    |  13 +-
 src/librustc_incremental/persist/load.rs      | 114 ++++++++----------
 src/librustc_incremental/persist/save.rs      |  57 ++++++---
 4 files changed, 116 insertions(+), 89 deletions(-)
diff --git a/src/librustc_incremental/persist/data.rs b/src/librustc_incremental/persist/data.rs
index c3cd9656afe5..06acfb5d7780 100644
--- a/src/librustc_incremental/persist/data.rs
+++ b/src/librustc_incremental/persist/data.rs
@@ -26,7 +26,7 @@ pub struct SerializedDepGraph {
     /// For each DepNode, stores the list of edges originating from that
     /// DepNode. Encoded as a [start, end) pair indexing into edge_list_data,
     /// which holds the actual DepNodeIndices of the target nodes.
-    pub edge_list_indices: Vec<(u32, u32)>,
+    pub edge_list_indices: IndexVec<DepNodeIndex, (u32, u32)>,
     /// A flattened list of all edge targets in the graph. Edge sources are
     /// implicit in edge_list_indices.
     pub edge_list_data: Vec<DepNodeIndex>,
@@ -55,7 +55,14 @@ pub struct SerializedDepGraph {
     /// will be different when we next compile) related to each node,
     /// but rather the `DefPathIndex`. This can then be retraced
     /// to find the current def-id.
-    pub hashes: Vec<SerializedHash>,
+    pub hashes: Vec<(DepNodeIndex, Fingerprint)>,
+}
+
+impl SerializedDepGraph {
+    pub fn edge_targets_from(&self, source: DepNodeIndex) -> &[DepNodeIndex] {
+        let targets = self.edge_list_indices[source];
+        &self.edge_list_data[targets.0 as usize .. targets.1 as usize]
+    }
 }
 
 /// The index of a DepNode in the SerializedDepGraph::nodes array.
@@ -84,16 +91,6 @@ impl Idx for DepNodeIndex {
     }
 }
 
-#[derive(Debug, RustcEncodable, RustcDecodable)]
-pub struct SerializedHash {
-    /// def-id of thing being hashed
-    pub dep_node: DepNode,
-
-    /// the hash as of previous compilation, computed by code in
-    /// `hash` module
-    pub hash: Fingerprint,
-}
-
 #[derive(Debug, RustcEncodable, RustcDecodable)]
 pub struct SerializedWorkProduct {
     /// node that produced the work-product
diff --git a/src/librustc_incremental/persist/dirty_clean.rs b/src/librustc_incremental/persist/dirty_clean.rs
index 01ec0d685268..3f3dc10365c6 100644
--- a/src/librustc_incremental/persist/dirty_clean.rs
+++ b/src/librustc_incremental/persist/dirty_clean.rs
@@ -40,6 +40,7 @@
 //! previous revision to compare things to.
 //!
 
+use super::data::DepNodeIndex;
 use super::load::DirtyNodes;
 use rustc::dep_graph::{DepGraphQuery, DepNode, DepKind};
 use rustc::hir;
@@ -50,6 +51,7 @@ use rustc::ich::{Fingerprint, ATTR_DIRTY, ATTR_CLEAN, ATTR_DIRTY_METADATA,
                  ATTR_CLEAN_METADATA};
 use syntax::ast::{self, Attribute, NestedMetaItem};
 use rustc_data_structures::fx::{FxHashSet, FxHashMap};
+use rustc_data_structures::indexed_vec::IndexVec;
 use syntax_pos::Span;
 use rustc::ty::TyCtxt;
 
@@ -57,6 +59,7 @@ const LABEL: &'static str = "label";
 const CFG: &'static str = "cfg";
 
 pub fn check_dirty_clean_annotations<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
+                                               nodes: &IndexVec<DepNodeIndex, DepNode>,
                                                dirty_inputs: &DirtyNodes) {
     // can't add `#[rustc_dirty]` etc without opting in to this feature
     if !tcx.sess.features.borrow().rustc_attrs {
@@ -66,8 +69,14 @@ pub fn check_dirty_clean_annotations<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
     let _ignore = tcx.dep_graph.in_ignore();
     let dirty_inputs: FxHashSet<DepNode> =
         dirty_inputs.keys()
-                    .filter(|dep_node| dep_node.extract_def_id(tcx).is_some())
-                    .cloned()
+                    .filter_map(|dep_node_index| {
+                        let dep_node = nodes[*dep_node_index];
+                        if dep_node.extract_def_id(tcx).is_some() {
+                            Some(dep_node)
+                        } else {
+                            None
+                        }
+                    })
                     .collect();
 
     let query = tcx.dep_graph.query();
diff --git a/src/librustc_incremental/persist/load.rs b/src/librustc_incremental/persist/load.rs
index 53fccfaa3929..28a00bf4aa6c 100644
--- a/src/librustc_incremental/persist/load.rs
+++ b/src/librustc_incremental/persist/load.rs
@@ -17,9 +17,9 @@ use rustc::ich::Fingerprint;
 use rustc::session::Session;
 use rustc::ty::TyCtxt;
 use rustc_data_structures::fx::{FxHashSet, FxHashMap};
+use rustc_data_structures::indexed_vec::IndexVec;
 use rustc_serialize::Decodable as RustcDecodable;
 use rustc_serialize::opaque::Decoder;
-use std::default::Default;
 use std::path::{Path};
 
 use IncrementalHashesMap;
@@ -32,7 +32,7 @@ use super::work_product;
 
 // The key is a dirty node. The value is **some** base-input that we
 // can blame it on.
-pub type DirtyNodes = FxHashMap<DepNode, DepNode>;
+pub type DirtyNodes = FxHashMap<DepNodeIndex, DepNodeIndex>;
 
 /// If we are in incremental mode, and a previous dep-graph exists,
 /// then load up those nodes/edges that are still valid into the
@@ -166,48 +166,35 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
     let serialized_dep_graph = SerializedDepGraph::decode(&mut dep_graph_decoder)?;
 
-    let edge_map: FxHashMap<DepNode, Vec<DepNode>> = {
-        let capacity = serialized_dep_graph.edge_list_data.len();
-        let mut edge_map = FxHashMap::with_capacity_and_hasher(capacity, Default::default());
-
-        for (node_index, source) in serialized_dep_graph.nodes.iter().enumerate() {
-            let (start, end) = serialized_dep_graph.edge_list_indices[node_index];
-            let targets =
-                (&serialized_dep_graph.edge_list_data[start as usize .. end as usize])
-                .into_iter()
-                .map(|&node_index| serialized_dep_graph.nodes[node_index].clone())
-                .collect();
-
-            edge_map.insert(source.clone(), targets);
-        }
-
-        edge_map
-    };
-
     // Compute the set of nodes from the old graph where some input
-    // has changed or been removed. These are "raw" source nodes,
-    // which means that they still use the original `DefPathIndex`
-    // values from the encoding, rather than having been retraced to a
-    // `DefId`. The reason for this is that this way we can include
-    // nodes that have been removed (which no longer have a `DefId` in
-    // the current compilation).
+    // has changed or been removed.
     let dirty_raw_nodes = initial_dirty_nodes(tcx,
                                               incremental_hashes_map,
+                                              &serialized_dep_graph.nodes,
                                               &serialized_dep_graph.hashes);
-    let dirty_raw_nodes = transitive_dirty_nodes(&edge_map, dirty_raw_nodes);
+    let dirty_raw_nodes = transitive_dirty_nodes(&serialized_dep_graph,
+                                                 dirty_raw_nodes);
 
     // Recreate the edges in the graph that are still clean.
     let mut clean_work_products = FxHashSet();
     let mut dirty_work_products = FxHashSet(); // incomplete; just used to suppress debug output
-    for (source, targets) in &edge_map {
-        for target in targets {
-            process_edge(tcx, source, target, &dirty_raw_nodes,
-                         &mut clean_work_products, &mut dirty_work_products);
+    for (source, targets) in serialized_dep_graph.edge_list_indices.iter_enumerated() {
+        let target_begin = targets.0 as usize;
+        let target_end = targets.1 as usize;
+
+        for &target in &serialized_dep_graph.edge_list_data[target_begin .. target_end] {
+            process_edge(tcx,
+                         source,
+                         target,
+                         &serialized_dep_graph.nodes,
+                         &dirty_raw_nodes,
+                         &mut clean_work_products,
+                         &mut dirty_work_products);
         }
     }
 
-    // Recreate bootstrap outputs, which are outputs that have no incoming edges (and hence cannot
-    // be dirty).
+    // Recreate bootstrap outputs, which are outputs that have no incoming edges
+    // (and hence cannot be dirty).
     for bootstrap_output in &serialized_dep_graph.bootstrap_outputs {
         if let DepKind::WorkProduct = bootstrap_output.kind {
             let wp_id = WorkProductId::from_fingerprint(bootstrap_output.hash);
@@ -225,7 +212,9 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
     // dirty.
     reconcile_work_products(tcx, work_products, &clean_work_products);
 
-    dirty_clean::check_dirty_clean_annotations(tcx, &dirty_raw_nodes);
+    dirty_clean::check_dirty_clean_annotations(tcx,
+                                               &serialized_dep_graph.nodes,
+                                               &dirty_raw_nodes);
 
     load_prev_metadata_hashes(tcx,
                               &mut *incremental_hashes_map.prev_metadata_hashes.borrow_mut());
@@ -236,19 +225,20 @@ pub fn decode_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 /// a bit vector where the index is the DefPathIndex.
 fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
                                  incremental_hashes_map: &IncrementalHashesMap,
-                                 serialized_hashes: &[SerializedHash])
+                                 nodes: &IndexVec<DepNodeIndex, DepNode>,
+                                 serialized_hashes: &[(DepNodeIndex, Fingerprint)])
                                  -> DirtyNodes {
     let mut hcx = HashContext::new(tcx, incremental_hashes_map);
     let mut dirty_nodes = FxHashMap();
 
-    for hash in serialized_hashes {
-        let dep_node = hash.dep_node;
+    for &(dep_node_index, prev_hash) in serialized_hashes {
+        let dep_node = nodes[dep_node_index];
         if does_still_exist(tcx, &dep_node) {
             let current_hash = hcx.hash(&dep_node).unwrap_or_else(|| {
                 bug!("Cannot find current ICH for input that still exists?")
             });
 
-            if current_hash == hash.hash {
+            if current_hash == prev_hash {
                 debug!("initial_dirty_nodes: {:?} is clean (hash={:?})",
                        dep_node,
                        current_hash);
@@ -259,13 +249,13 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
                 println!("node {:?} is dirty as hash is {:?}, was {:?}",
                          dep_node,
                          current_hash,
-                         hash.hash);
+                         prev_hash);
             }
 
             debug!("initial_dirty_nodes: {:?} is dirty as hash is {:?}, was {:?}",
                    dep_node,
                    current_hash,
-                   hash.hash);
+                   prev_hash);
         } else {
             if tcx.sess.opts.debugging_opts.incremental_dump_hash {
                 println!("node {:?} is dirty as it was removed", dep_node);
@@ -273,30 +263,27 @@ fn initial_dirty_nodes<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
 
             debug!("initial_dirty_nodes: {:?} is dirty as it was removed", dep_node);
         }
-
-        dirty_nodes.insert(hash.dep_node.clone(), hash.dep_node.clone());
+        dirty_nodes.insert(dep_node_index, dep_node_index);
     }
 
     dirty_nodes
 }
 
-fn transitive_dirty_nodes(edge_map: &FxHashMap<DepNode, Vec<DepNode>>,
+fn transitive_dirty_nodes(serialized_dep_graph: &SerializedDepGraph,
                           mut dirty_nodes: DirtyNodes)
                           -> DirtyNodes
 {
-    let mut stack: Vec<(DepNode, DepNode)> = vec![];
-    stack.extend(dirty_nodes.iter().map(|(s, b)| (s.clone(), b.clone())));
+    let mut stack: Vec<(DepNodeIndex, DepNodeIndex)> = vec![];
+    stack.extend(dirty_nodes.iter().map(|(&s, &b)| (s, b)));
     while let Some((source, blame)) = stack.pop() {
         // we know the source is dirty (because of the node `blame`)...
-        assert!(dirty_nodes.contains_key(&source));
+        debug_assert!(dirty_nodes.contains_key(&source));
 
         // ...so we dirty all the targets (with the same blame)
-        if let Some(targets) = edge_map.get(&source) {
-            for target in targets {
-                if !dirty_nodes.contains_key(target) {
-                    dirty_nodes.insert(target.clone(), blame.clone());
-                    stack.push((target.clone(), blame.clone()));
-                }
+        for &target in serialized_dep_graph.edge_targets_from(source) {
+            if !dirty_nodes.contains_key(&target) {
+                dirty_nodes.insert(target, blame);
+                stack.push((target, blame));
             }
         }
     }
@@ -402,8 +389,9 @@ fn load_prev_metadata_hashes(tcx: TyCtxt,
 
 fn process_edge<'a, 'tcx, 'edges>(
     tcx: TyCtxt<'a, 'tcx, 'tcx>,
-    source: &'edges DepNode,
-    target: &'edges DepNode,
+    source: DepNodeIndex,
+    target: DepNodeIndex,
+    nodes: &IndexVec<DepNodeIndex, DepNode>,
     dirty_raw_nodes: &DirtyNodes,
     clean_work_products: &mut FxHashSet<WorkProductId>,
     dirty_work_products: &mut FxHashSet<WorkProductId>)
@@ -411,7 +399,8 @@ fn process_edge<'a, 'tcx, 'edges>(
     // If the target is dirty, skip the edge. If this is an edge
     // that targets a work-product, we can print the blame
     // information now.
-    if let Some(blame) = dirty_raw_nodes.get(target) {
+    if let Some(&blame) = dirty_raw_nodes.get(&target) {
+        let target = nodes[target];
         if let DepKind::WorkProduct = target.kind {
             if tcx.sess.opts.debugging_opts.incremental_info {
                 let wp_id = WorkProductId::from_fingerprint(target.hash);
@@ -420,6 +409,7 @@ fn process_edge<'a, 'tcx, 'edges>(
                     // Try to reconstruct the human-readable version of the
                     // DepNode. This cannot be done for things that where
                     // removed.
+                    let blame = nodes[blame];
                     let blame_str = if let Some(def_id) = blame.extract_def_id(tcx) {
                         format!("{:?}({})",
                                 blame.kind,
@@ -444,21 +434,23 @@ fn process_edge<'a, 'tcx, 'edges>(
 
     // We should never have an edge where the target is clean but the source
     // was dirty. Otherwise something was wrong with the dirtying pass above:
-    debug_assert!(!dirty_raw_nodes.contains_key(source));
+    debug_assert!(!dirty_raw_nodes.contains_key(&source));
 
     // We also never should encounter an edge going from a removed input to a
     // clean target because removing the input would have dirtied the input
     // node and transitively dirtied the target.
-    debug_assert!(match source.kind {
+    debug_assert!(match nodes[source].kind {
         DepKind::Hir | DepKind::HirBody | DepKind::MetaData => {
-            does_still_exist(tcx, source)
+            does_still_exist(tcx, &nodes[source])
         }
         _ => true,
     });
 
-    if !dirty_raw_nodes.contains_key(target) {
-        let _task = tcx.dep_graph.in_task(*target);
-        tcx.dep_graph.read(*source);
+    if !dirty_raw_nodes.contains_key(&target) {
+        let target = nodes[target];
+        let source = nodes[source];
+        let _task = tcx.dep_graph.in_task(target);
+        tcx.dep_graph.read(source);
 
         if let DepKind::WorkProduct = target.kind {
             let wp_id = WorkProductId::from_fingerprint(target.hash);
diff --git a/src/librustc_incremental/persist/save.rs b/src/librustc_incremental/persist/save.rs
index 48742b424f1e..867452d97e8f 100644
--- a/src/librustc_incremental/persist/save.rs
+++ b/src/librustc_incremental/persist/save.rs
@@ -174,14 +174,14 @@ pub fn encode_dep_graph(tcx: TyCtxt,
     tcx.sess.opts.dep_tracking_hash().encode(encoder)?;
 
     // NB: We rely on this Vec being indexable by reduced_graph's NodeIndex.
-    let nodes: IndexVec<DepNodeIndex, DepNode> = preds
+    let mut nodes: IndexVec<DepNodeIndex, DepNode> = preds
         .reduced_graph
         .all_nodes()
         .iter()
         .map(|node| node.data.clone())
         .collect();
 
-    let mut edge_list_indices = Vec::with_capacity(nodes.len());
+    let mut edge_list_indices = IndexVec::with_capacity(nodes.len());
     let mut edge_list_data = Vec::with_capacity(preds.reduced_graph.len_edges());
 
     for node_index in 0 .. nodes.len() {
@@ -196,7 +196,7 @@ pub fn encode_dep_graph(tcx: TyCtxt,
         edge_list_indices.push((start, end));
     }
 
-    // Let's make we had no overflow there.
+    // Let's make sure we had no overflow there.
     assert!(edge_list_data.len() <= ::std::u32::MAX as usize);
     // Check that we have a consistent number of edges.
     assert_eq!(edge_list_data.len(), preds.reduced_graph.len_edges());
@@ -206,23 +206,52 @@ pub fn encode_dep_graph(tcx: TyCtxt,
                                  .map(|dep_node| (**dep_node).clone())
                                  .collect();
 
-    let hashes = preds
-        .hashes
-        .iter()
-        .map(|(&dep_node, &hash)| {
-            SerializedHash {
-                dep_node: dep_node.clone(),
-                hash: hash,
-            }
-        })
-        .collect();
+    // Next, build the map of content hashes. To this end, we need to transform
+    // the (DepNode -> Fingerprint) map that we have into a
+    // (DepNodeIndex -> Fingerprint) map. This may necessitate adding nodes back
+    // to the dep-graph that have been filtered out during reduction.
+    let content_hashes = {
+        // We have to build a (DepNode -> DepNodeIndex) map. We over-allocate a
+        // little because we expect some more nodes to be added.
+        let capacity = (nodes.len() * 120) / 100;
+        let mut node_to_index = FxHashMap::with_capacity_and_hasher(capacity,
+                                                                    Default::default());
+        // Add the nodes we already have in the graph.
+        node_to_index.extend(nodes.iter_enumerated()
+                                  .map(|(index, &node)| (node, index)));
+
+        let mut content_hashes = Vec::with_capacity(preds.hashes.len());
+
+        for (&&dep_node, &hash) in preds.hashes.iter() {
+            let dep_node_index = *node_to_index
+                .entry(dep_node)
+                .or_insert_with(|| {
+                    // There is no DepNodeIndex for this DepNode yet. This
+                    // happens when the DepNode got filtered out during graph
+                    // reduction. Since we have a content hash for the DepNode,
+                    // we add it back to the graph.
+                    let next_index = nodes.len();
+                    nodes.push(dep_node);
+
+                    debug_assert_eq!(next_index, edge_list_indices.len());
+                    // Push an empty list of edges
+                    edge_list_indices.push((0,0));
+
+                    DepNodeIndex::new(next_index)
+                });
+
+            content_hashes.push((dep_node_index, hash));
+        }
+
+        content_hashes
+    };
 
     let graph = SerializedDepGraph {
         nodes,
         edge_list_indices,
         edge_list_data,
         bootstrap_outputs,
-        hashes,
+        hashes: content_hashes,
     };
 
     // Encode the graph data.