From cc69b425195156dfa6609ded54814db0b221d4fc Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 Oct 2025 11:18:49 +1100 Subject: [PATCH 1/4] Use `contains` with `task_deps.reads`. --- compiler/rustc_query_system/src/dep_graph/graph.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index 5e62dab0722c..eb0b1f1313e1 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -484,7 +484,7 @@ impl DepGraph { // As long as we only have a low number of reads we can avoid doing a hash // insert and potentially allocating/reallocating the hashmap let new_read = if task_deps.reads.len() < EdgesVec::INLINE_CAPACITY { - task_deps.reads.iter().all(|other| *other != dep_node_index) + !task_deps.reads.contains(&dep_node_index) } else { task_deps.read_set.insert(dep_node_index) }; From 38d7e8b502bfe23cc83b261ddd713c82eac75b10 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 Oct 2025 13:22:43 +1100 Subject: [PATCH 2/4] Rename a badly-named variable. --- compiler/rustc_query_system/src/dep_graph/graph.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index eb0b1f1313e1..32d66a87c7f5 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -390,9 +390,9 @@ impl DepGraphData { let task_deps = Lock::new(TaskDeps::default()); let result = D::with_deps(TaskDepsRef::Allow(&task_deps), op); let task_deps = task_deps.into_inner(); - let task_deps = task_deps.reads; + let reads = task_deps.reads; - let dep_node_index = match task_deps.len() { + let dep_node_index = match reads.len() { 0 => { // Because the dep-node id of anon nodes is computed from the sets of its // dependencies we already know what the ID of this dependency-less node is @@ -403,7 +403,7 @@ impl DepGraphData { } 1 => { // When there is only one dependency, don't bother creating a node. - task_deps[0] + reads[0] } _ => { // The dep node indices are hashed here instead of hashing the dep nodes of the @@ -412,7 +412,7 @@ impl DepGraphData { // combining it with the per session random number `anon_id_seed`. This hash only need // to map the dependencies to a single value on a per session basis. let mut hasher = StableHasher::new(); - task_deps.hash(&mut hasher); + reads.hash(&mut hasher); let target_dep_node = DepNode { kind: dep_kind, @@ -430,7 +430,7 @@ impl DepGraphData { // memory impact of this `anon_node_to_index` map remains tolerable, and helps // us avoid useless growth of the graph with almost-equivalent nodes. self.current.anon_node_to_index.get_or_insert_with(target_dep_node, || { - self.current.alloc_new_node(target_dep_node, task_deps, Fingerprint::ZERO) + self.current.alloc_new_node(target_dep_node, reads, Fingerprint::ZERO) }) } }; From 43e813c559b6db0eb1517bcf49a0c13b4f24729e Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 Oct 2025 13:28:34 +1100 Subject: [PATCH 3/4] Replace `TaskDeps::default()` with `TaskDeps::new()`. There are only two places that create a `TaskDeps`. One constructs it manually, the other uses `default`. It's weird that `default()` uses a capacity of 128. This commit just gets rid of `default` and introduces `new` so that both construction sites can be equivalent. --- .../rustc_query_system/src/dep_graph/graph.rs | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index 32d66a87c7f5..4f12380ca786 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -349,13 +349,11 @@ impl DepGraphData { let (result, edges) = if cx.dep_context().is_eval_always(key.kind) { (with_deps(TaskDepsRef::EvalAlways), EdgesVec::new()) } else { - let task_deps = Lock::new(TaskDeps { + let task_deps = Lock::new(TaskDeps::new( #[cfg(debug_assertions)] - node: Some(key), - reads: EdgesVec::new(), - read_set: Default::default(), - phantom_data: PhantomData, - }); + Some(key), + 0, + )); (with_deps(TaskDepsRef::Allow(&task_deps)), task_deps.into_inner().reads) }; @@ -387,7 +385,11 @@ impl DepGraphData { { debug_assert!(!cx.is_eval_always(dep_kind)); - let task_deps = Lock::new(TaskDeps::default()); + let task_deps = Lock::new(TaskDeps::new( + #[cfg(debug_assertions)] + None, + 128, + )); let result = D::with_deps(TaskDepsRef::Allow(&task_deps), op); let task_deps = task_deps.into_inner(); let reads = task_deps.reads; @@ -1307,17 +1309,19 @@ pub struct TaskDeps { phantom_data: PhantomData, } -impl Default for TaskDeps { - fn default() -> Self { - Self { +impl TaskDeps { + #[inline] + fn new(#[cfg(debug_assertions)] node: Option, read_set_capacity: usize) -> Self { + TaskDeps { #[cfg(debug_assertions)] - node: None, + node, reads: EdgesVec::new(), - read_set: FxHashSet::with_capacity_and_hasher(128, Default::default()), + read_set: FxHashSet::with_capacity_and_hasher(read_set_capacity, Default::default()), phantom_data: PhantomData, } } } + // A data structure that stores Option values as a contiguous // array, using one u32 per entry. pub(super) struct DepNodeColorMap { From 5d0007627a308daf5c7bb228a1edc009a0134e4a Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 9 Oct 2025 16:01:24 +1100 Subject: [PATCH 4/4] Clarify and improve `EdgesVec::INLINE_CAPACITY` use. `INLINE_CAPACITY` has two different uses: - It dictates the inline capacity of `EdgesVec::edges`, which is a `SmallVec`. - It dictates when `TaskDeps` switches from a linear scan lookup to a hashset lookup to determine if an edge has been seen before. These two uses are in the same part of the code, but they're fundamentally separate and don't need to use the same constant. This commit separates the two uses, and adds some helpful comments, making the code clearer. It also changes the value used for the linear/hashset threshold from 8 to 16, which gives slightly better perf. --- .../rustc_query_system/src/dep_graph/edges.rs | 4 +--- .../rustc_query_system/src/dep_graph/graph.rs | 24 ++++++++++++++----- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/compiler/rustc_query_system/src/dep_graph/edges.rs b/compiler/rustc_query_system/src/dep_graph/edges.rs index 9a3763bd4eeb..092e9c1ceb75 100644 --- a/compiler/rustc_query_system/src/dep_graph/edges.rs +++ b/compiler/rustc_query_system/src/dep_graph/edges.rs @@ -8,7 +8,7 @@ use crate::dep_graph::DepNodeIndex; #[derive(Default, Debug)] pub(crate) struct EdgesVec { max: u32, - edges: SmallVec<[DepNodeIndex; EdgesVec::INLINE_CAPACITY]>, + edges: SmallVec<[DepNodeIndex; 8]>, } impl Hash for EdgesVec { @@ -19,8 +19,6 @@ impl Hash for EdgesVec { } impl EdgesVec { - pub(crate) const INLINE_CAPACITY: usize = 8; - #[inline] pub(crate) fn new() -> Self { Self::default() diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs index 4f12380ca786..d962db3585f2 100644 --- a/compiler/rustc_query_system/src/dep_graph/graph.rs +++ b/compiler/rustc_query_system/src/dep_graph/graph.rs @@ -385,6 +385,8 @@ impl DepGraphData { { debug_assert!(!cx.is_eval_always(dep_kind)); + // Large numbers of reads are common enough here that pre-sizing `read_set` + // to 128 actually helps perf on some benchmarks. let task_deps = Lock::new(TaskDeps::new( #[cfg(debug_assertions)] None, @@ -483,18 +485,17 @@ impl DepGraph { data.current.total_read_count.fetch_add(1, Ordering::Relaxed); } - // As long as we only have a low number of reads we can avoid doing a hash - // insert and potentially allocating/reallocating the hashmap - let new_read = if task_deps.reads.len() < EdgesVec::INLINE_CAPACITY { + // Has `dep_node_index` been seen before? Use either a linear scan or a hashset + // lookup to determine this. See `TaskDeps::read_set` for details. + let new_read = if task_deps.reads.len() <= TaskDeps::LINEAR_SCAN_MAX { !task_deps.reads.contains(&dep_node_index) } else { task_deps.read_set.insert(dep_node_index) }; if new_read { task_deps.reads.push(dep_node_index); - if task_deps.reads.len() == EdgesVec::INLINE_CAPACITY { - // Fill `read_set` with what we have so far so we can use the hashset - // next time + if task_deps.reads.len() == TaskDeps::LINEAR_SCAN_MAX + 1 { + // Fill `read_set` with what we have so far. Future lookups will use it. task_deps.read_set.extend(task_deps.reads.iter().copied()); } @@ -1304,12 +1305,23 @@ pub enum TaskDepsRef<'a> { pub struct TaskDeps { #[cfg(debug_assertions)] node: Option, + + /// A vector of `DepNodeIndex`, basically. reads: EdgesVec, + + /// When adding new edges to `reads` in `DepGraph::read_index` we need to determine if the edge + /// has been seen before. If the number of elements in `reads` is small, we just do a linear + /// scan. If the number is higher, a hashset has better perf. This field is that hashset. It's + /// only used if the number of elements in `reads` exceeds `LINEAR_SCAN_MAX`. read_set: FxHashSet, + phantom_data: PhantomData, } impl TaskDeps { + /// See `TaskDeps::read_set` above. + const LINEAR_SCAN_MAX: usize = 16; + #[inline] fn new(#[cfg(debug_assertions)] node: Option, read_set_capacity: usize) -> Self { TaskDeps {