Auto merge of #152318 - nnethercote:streamline-HashStableContext, r=cjgillot

Streamline `rustc_span::HashStableContext`. Currently this trait has five methods. But it only really needs three. For example, currently stable hashing of spans is implemented in `rustc_span`, except a couple of sub-operations are delegated to `rustc_query_system`: `def_span` and `span_data_to_lines_and_cols`. These two delegated sub-operations can be reduced to a single delegated operation that does the full hash computation. Likewise, `assert_default_hashing_controls` depends on two delegated sub-operations, `hashing_controls` and `unstable_opts_incremental_ignore_spans`, and can be simplified. I find the resulting code simpler and clearer -- when necessary, we do a whole operation in `rustc_query_system` instead of doing it partly in `rustc_span` and partly in `rustc_query_system`. r? @cjgillot
2026-02-09 02:31:13 +00:00 · 2026-02-09 02:31:13 +00:00 · 39219ceb97
commit 39219ceb97
parent 1c316d3461 0f3108327b
3 changed files with 121 additions and 136 deletions
--- a/compiler/rustc_query_system/src/ich/hcx.rs
+++ b/compiler/rustc_query_system/src/ich/hcx.rs
@ -1,10 +1,12 @@
-use rustc_data_structures::stable_hasher::HashingControls;
+use std::hash::Hash;
+
+use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
 use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_hir::definitions::DefPathHash;
 use rustc_session::Session;
 use rustc_session::cstore::Untracked;
 use rustc_span::source_map::SourceMap;
-use rustc_span::{BytePos, CachingSourceMapView, DUMMY_SP, SourceFile, Span, SpanData};
+use rustc_span::{CachingSourceMapView, DUMMY_SP, Pos, Span};

 // Very often, we are hashing something that does not need the `CachingSourceMapView`, so we
 // initialize it lazily.
@ -60,6 +62,11 @@ impl<'a> StableHashingContext<'a> {
        }
    }

+    #[inline]
+    fn def_span(&self, def_id: LocalDefId) -> Span {
+        self.untracked.source_span.get(def_id).unwrap_or(DUMMY_SP)
+    }
+
    #[inline]
    pub fn hashing_controls(&self) -> HashingControls {
        self.hashing_controls
@ -67,9 +74,88 @@ impl<'a> StableHashingContext<'a> {
 }

 impl<'a> rustc_span::HashStableContext for StableHashingContext<'a> {
+    /// Hashes a span in a stable way. We can't directly hash the span's `BytePos` fields (that
+    /// would be similar to hashing pointers, since those are just offsets into the `SourceMap`).
+    /// Instead, we hash the (file name, line, column) triple, which stays the same even if the
+    /// containing `SourceFile` has moved within the `SourceMap`.
+    ///
+    /// Also note that we are hashing byte offsets for the column, not unicode codepoint offsets.
+    /// For the purpose of the hash that's sufficient. Also, hashing filenames is expensive so we
+    /// avoid doing it twice when the span starts and ends in the same file, which is almost always
+    /// the case.
+    ///
+    /// IMPORTANT: changes to this method should be reflected in implementations of `SpanEncoder`.
    #[inline]
-    fn unstable_opts_incremental_ignore_spans(&self) -> bool {
-        self.incremental_ignore_spans
+    fn span_hash_stable(&mut self, span: Span, hasher: &mut StableHasher) {
+        const TAG_VALID_SPAN: u8 = 0;
+        const TAG_INVALID_SPAN: u8 = 1;
+        const TAG_RELATIVE_SPAN: u8 = 2;
+
+        if !self.hashing_controls().hash_spans {
+            return;
+        }
+
+        let span = span.data_untracked();
+        span.ctxt.hash_stable(self, hasher);
+        span.parent.hash_stable(self, hasher);
+
+        if span.is_dummy() {
+            Hash::hash(&TAG_INVALID_SPAN, hasher);
+            return;
+        }
+
+        let parent = span.parent.map(|parent| self.def_span(parent).data_untracked());
+        if let Some(parent) = parent
+            && parent.contains(span)
+        {
+            // This span is enclosed in a definition: only hash the relative position. This catches
+            // a subset of the cases from the `file.contains(parent.lo)`. But we can do this check
+            // cheaply without the expensive `span_data_to_lines_and_cols` query.
+            Hash::hash(&TAG_RELATIVE_SPAN, hasher);
+            (span.lo - parent.lo).to_u32().hash_stable(self, hasher);
+            (span.hi - parent.lo).to_u32().hash_stable(self, hasher);
+            return;
+        }
+
+        // If this is not an empty or invalid span, we want to hash the last position that belongs
+        // to it, as opposed to hashing the first position past it.
+        let Some((file, line_lo, col_lo, line_hi, col_hi)) =
+            self.source_map().span_data_to_lines_and_cols(&span)
+        else {
+            Hash::hash(&TAG_INVALID_SPAN, hasher);
+            return;
+        };
+
+        if let Some(parent) = parent
+            && file.contains(parent.lo)
+        {
+            // This span is relative to another span in the same file,
+            // only hash the relative position.
+            Hash::hash(&TAG_RELATIVE_SPAN, hasher);
+            Hash::hash(&(span.lo.0.wrapping_sub(parent.lo.0)), hasher);
+            Hash::hash(&(span.hi.0.wrapping_sub(parent.lo.0)), hasher);
+            return;
+        }
+
+        Hash::hash(&TAG_VALID_SPAN, hasher);
+        Hash::hash(&file.stable_id, hasher);
+
+        // Hash both the length and the end location (line/column) of a span. If we hash only the
+        // length, for example, then two otherwise equal spans with different end locations will
+        // have the same hash. This can cause a problem during incremental compilation wherein a
+        // previous result for a query that depends on the end location of a span will be
+        // incorrectly reused when the end location of the span it depends on has changed (see
+        // issue #74890). A similar analysis applies if some query depends specifically on the
+        // length of the span, but we only hash the end location. So hash both.
+
+        let col_lo_trunc = (col_lo.0 as u64) & 0xFF;
+        let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8;
+        let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32;
+        let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40;
+        let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc;
+        let len = (span.hi - span.lo).0;
+        Hash::hash(&col_line, hasher);
+        Hash::hash(&len, hasher);
    }

    #[inline]
@ -81,22 +167,26 @@ impl<'a> rustc_span::HashStableContext for StableHashingContext<'a> {
        }
    }

+    /// Assert that the provided `HashStableContext` is configured with the default
+    /// `HashingControls`. We should always have bailed out before getting to here with a
+    /// non-default mode. With this check in place, we can avoid the need to maintain separate
+    /// versions of `ExpnData` hashes for each permutation of `HashingControls` settings.
    #[inline]
-    fn def_span(&self, def_id: LocalDefId) -> Span {
-        self.untracked.source_span.get(def_id).unwrap_or(DUMMY_SP)
-    }
+    fn assert_default_hashing_controls(&self, msg: &str) {
+        let hashing_controls = self.hashing_controls;
+        let HashingControls { hash_spans } = hashing_controls;

-    #[inline]
-    fn span_data_to_lines_and_cols(
-        &mut self,
-        span: &SpanData,
-    ) -> Option<(&SourceFile, usize, BytePos, usize, BytePos)> {
-        self.source_map().span_data_to_lines_and_cols(span)
-    }
-
-    #[inline]
-    fn hashing_controls(&self) -> HashingControls {
-        self.hashing_controls
+        // Note that we require that `hash_spans` be the inverse of the global `-Z
+        // incremental-ignore-spans` option. Normally, this option is disabled, in which case
+        // `hash_spans` must be true.
+        //
+        // Span hashing can also be disabled without `-Z incremental-ignore-spans`. This is the
+        // case for instance when building a hash for name mangling. Such configuration must not be
+        // used for metadata.
+        assert_eq!(
+            hash_spans, !self.incremental_ignore_spans,
+            "Attempted hashing of {msg} with non-default HashingControls: {hashing_controls:?}"
+        );
    }
 }

--- a/compiler/rustc_span/src/hygiene.rs
+++ b/compiler/rustc_span/src/hygiene.rs
@ -30,7 +30,7 @@ use std::{fmt, iter, mem};

 use rustc_data_structures::fingerprint::Fingerprint;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
-use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
+use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
 use rustc_data_structures::sync::Lock;
 use rustc_data_structures::unhash::UnhashMap;
 use rustc_hashes::Hash64;
@ -126,29 +126,6 @@ rustc_index::newtype_index! {
 impl !Ord for LocalExpnId {}
 impl !PartialOrd for LocalExpnId {}

-/// Assert that the provided `HashStableContext` is configured with the 'default'
-/// `HashingControls`. We should always have bailed out before getting to here
-/// with a non-default mode. With this check in place, we can avoid the need
-/// to maintain separate versions of `ExpnData` hashes for each permutation
-/// of `HashingControls` settings.
-fn assert_default_hashing_controls(ctx: &impl HashStableContext, msg: &str) {
-    let hashing_controls = ctx.hashing_controls();
-    let HashingControls { hash_spans } = hashing_controls;
-
-    // Note that we require that `hash_spans` be the inverse of the global
-    // `-Z incremental-ignore-spans` option. Normally, this option is disabled,
-    // in which case `hash_spans` must be true.
-    //
-    // Span hashing can also be disabled without `-Z incremental-ignore-spans`.
-    // This is the case for instance when building a hash for name mangling.
-    // Such configuration must not be used for metadata.
-    assert_eq!(
-        hash_spans,
-        !ctx.unstable_opts_incremental_ignore_spans(),
-        "Attempted hashing of {msg} with non-default HashingControls: {hashing_controls:?}"
-    );
-}
-
 /// A unique hash value associated to an expansion.
 #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Encodable, Decodable, HashStable_Generic)]
 pub struct ExpnHash(Fingerprint);
@ -1508,7 +1485,7 @@ pub fn raw_encode_syntax_context(
 fn update_disambiguator(expn_data: &mut ExpnData, mut ctx: impl HashStableContext) -> ExpnHash {
    // This disambiguator should not have been set yet.
    assert_eq!(expn_data.disambiguator, 0, "Already set disambiguator for ExpnData: {expn_data:?}");
-    assert_default_hashing_controls(&ctx, "ExpnData (disambiguator)");
+    ctx.assert_default_hashing_controls("ExpnData (disambiguator)");
    let mut expn_hash = expn_data.hash_expn(&mut ctx);

    let disambiguator = HygieneData::with(|data| {
@ -1558,7 +1535,7 @@ impl<CTX: HashStableContext> HashStable<CTX> for SyntaxContext {

 impl<CTX: HashStableContext> HashStable<CTX> for ExpnId {
    fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
-        assert_default_hashing_controls(ctx, "ExpnId");
+        ctx.assert_default_hashing_controls("ExpnId");
        let hash = if *self == ExpnId::root() {
            // Avoid fetching TLS storage for a trivial often-used value.
            Fingerprint::ZERO
--- a/compiler/rustc_span/src/lib.rs
+++ b/compiler/rustc_span/src/lib.rs
@ -54,7 +54,6 @@ use hygiene::Transparency;
 pub use hygiene::{
    DesugaringKind, ExpnData, ExpnHash, ExpnId, ExpnKind, LocalExpnId, MacroKind, SyntaxContext,
 };
-use rustc_data_structures::stable_hasher::HashingControls;
 pub mod def_id;
 use def_id::{CrateNum, DefId, DefIndex, DefPathHash, LOCAL_CRATE, LocalDefId, StableCrateId};
 pub mod edit_distance;
@ -2796,105 +2795,24 @@ impl InnerSpan {
 /// This is a hack to allow using the [`HashStable_Generic`] derive macro
 /// instead of implementing everything in rustc_middle.
 pub trait HashStableContext {
+    /// The main event: stable hashing of a span.
+    fn span_hash_stable(&mut self, span: Span, hasher: &mut StableHasher);
+
+    /// Compute a `DefPathHash`.
    fn def_path_hash(&self, def_id: DefId) -> DefPathHash;
-    /// Accesses `sess.opts.unstable_opts.incremental_ignore_spans` since
-    /// we don't have easy access to a `Session`
-    fn unstable_opts_incremental_ignore_spans(&self) -> bool;
-    fn def_span(&self, def_id: LocalDefId) -> Span;
-    fn span_data_to_lines_and_cols(
-        &mut self,
-        span: &SpanData,
-    ) -> Option<(&SourceFile, usize, BytePos, usize, BytePos)>;
-    fn hashing_controls(&self) -> HashingControls;
+
+    /// Assert that the provided `HashStableContext` is configured with the default
+    /// `HashingControls`. We should always have bailed out before getting to here with a
+    fn assert_default_hashing_controls(&self, msg: &str);
 }

 impl<CTX> HashStable<CTX> for Span
 where
    CTX: HashStableContext,
 {
-    /// Hashes a span in a stable way. We can't directly hash the span's `BytePos`
-    /// fields (that would be similar to hashing pointers, since those are just
-    /// offsets into the `SourceMap`). Instead, we hash the (file name, line, column)
-    /// triple, which stays the same even if the containing `SourceFile` has moved
-    /// within the `SourceMap`.
-    ///
-    /// Also note that we are hashing byte offsets for the column, not unicode
-    /// codepoint offsets. For the purpose of the hash that's sufficient.
-    /// Also, hashing filenames is expensive so we avoid doing it twice when the
-    /// span starts and ends in the same file, which is almost always the case.
-    ///
-    /// IMPORTANT: changes to this method should be reflected in implementations of `SpanEncoder`.
    fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
-        const TAG_VALID_SPAN: u8 = 0;
-        const TAG_INVALID_SPAN: u8 = 1;
-        const TAG_RELATIVE_SPAN: u8 = 2;
-
-        if !ctx.hashing_controls().hash_spans {
-            return;
-        }
-
-        let span = self.data_untracked();
-        span.ctxt.hash_stable(ctx, hasher);
-        span.parent.hash_stable(ctx, hasher);
-
-        if span.is_dummy() {
-            Hash::hash(&TAG_INVALID_SPAN, hasher);
-            return;
-        }
-
-        let parent = span.parent.map(|parent| ctx.def_span(parent).data_untracked());
-        if let Some(parent) = parent
-            && parent.contains(span)
-        {
-            // This span is enclosed in a definition: only hash the relative position.
-            // This catches a subset of the cases from the `file.contains(parent.lo)`,
-            // But we can do this check cheaply without the expensive `span_data_to_lines_and_cols` query.
-            Hash::hash(&TAG_RELATIVE_SPAN, hasher);
-            (span.lo - parent.lo).to_u32().hash_stable(ctx, hasher);
-            (span.hi - parent.lo).to_u32().hash_stable(ctx, hasher);
-            return;
-        }
-
-        // If this is not an empty or invalid span, we want to hash the last
-        // position that belongs to it, as opposed to hashing the first
-        // position past it.
-        let Some((file, line_lo, col_lo, line_hi, col_hi)) = ctx.span_data_to_lines_and_cols(&span)
-        else {
-            Hash::hash(&TAG_INVALID_SPAN, hasher);
-            return;
-        };
-
-        if let Some(parent) = parent
-            && file.contains(parent.lo)
-        {
-            // This span is relative to another span in the same file,
-            // only hash the relative position.
-            Hash::hash(&TAG_RELATIVE_SPAN, hasher);
-            Hash::hash(&(span.lo.0.wrapping_sub(parent.lo.0)), hasher);
-            Hash::hash(&(span.hi.0.wrapping_sub(parent.lo.0)), hasher);
-            return;
-        }
-
-        Hash::hash(&TAG_VALID_SPAN, hasher);
-        Hash::hash(&file.stable_id, hasher);
-
-        // Hash both the length and the end location (line/column) of a span. If we
-        // hash only the length, for example, then two otherwise equal spans with
-        // different end locations will have the same hash. This can cause a problem
-        // during incremental compilation wherein a previous result for a query that
-        // depends on the end location of a span will be incorrectly reused when the
-        // end location of the span it depends on has changed (see issue #74890). A
-        // similar analysis applies if some query depends specifically on the length
-        // of the span, but we only hash the end location. So hash both.
-
-        let col_lo_trunc = (col_lo.0 as u64) & 0xFF;
-        let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8;
-        let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32;
-        let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40;
-        let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc;
-        let len = (span.hi - span.lo).0;
-        Hash::hash(&col_line, hasher);
-        Hash::hash(&len, hasher);
+        // `span_hash_stable` does all the work.
+        ctx.span_hash_stable(*self, hasher)
    }
 }