Auto merge of #152318 - nnethercote:streamline-HashStableContext, r=cjgillot
Streamline `rustc_span::HashStableContext`. Currently this trait has five methods. But it only really needs three. For example, currently stable hashing of spans is implemented in `rustc_span`, except a couple of sub-operations are delegated to `rustc_query_system`: `def_span` and `span_data_to_lines_and_cols`. These two delegated sub-operations can be reduced to a single delegated operation that does the full hash computation. Likewise, `assert_default_hashing_controls` depends on two delegated sub-operations, `hashing_controls` and `unstable_opts_incremental_ignore_spans`, and can be simplified. I find the resulting code simpler and clearer -- when necessary, we do a whole operation in `rustc_query_system` instead of doing it partly in `rustc_span` and partly in `rustc_query_system`. r? @cjgillot
This commit is contained in:
commit
39219ceb97
3 changed files with 121 additions and 136 deletions
|
|
@ -1,10 +1,12 @@
|
|||
use rustc_data_structures::stable_hasher::HashingControls;
|
||||
use std::hash::Hash;
|
||||
|
||||
use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
|
||||
use rustc_hir::def_id::{DefId, LocalDefId};
|
||||
use rustc_hir::definitions::DefPathHash;
|
||||
use rustc_session::Session;
|
||||
use rustc_session::cstore::Untracked;
|
||||
use rustc_span::source_map::SourceMap;
|
||||
use rustc_span::{BytePos, CachingSourceMapView, DUMMY_SP, SourceFile, Span, SpanData};
|
||||
use rustc_span::{CachingSourceMapView, DUMMY_SP, Pos, Span};
|
||||
|
||||
// Very often, we are hashing something that does not need the `CachingSourceMapView`, so we
|
||||
// initialize it lazily.
|
||||
|
|
@ -60,6 +62,11 @@ impl<'a> StableHashingContext<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn def_span(&self, def_id: LocalDefId) -> Span {
|
||||
self.untracked.source_span.get(def_id).unwrap_or(DUMMY_SP)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn hashing_controls(&self) -> HashingControls {
|
||||
self.hashing_controls
|
||||
|
|
@ -67,9 +74,88 @@ impl<'a> StableHashingContext<'a> {
|
|||
}
|
||||
|
||||
impl<'a> rustc_span::HashStableContext for StableHashingContext<'a> {
|
||||
/// Hashes a span in a stable way. We can't directly hash the span's `BytePos` fields (that
|
||||
/// would be similar to hashing pointers, since those are just offsets into the `SourceMap`).
|
||||
/// Instead, we hash the (file name, line, column) triple, which stays the same even if the
|
||||
/// containing `SourceFile` has moved within the `SourceMap`.
|
||||
///
|
||||
/// Also note that we are hashing byte offsets for the column, not unicode codepoint offsets.
|
||||
/// For the purpose of the hash that's sufficient. Also, hashing filenames is expensive so we
|
||||
/// avoid doing it twice when the span starts and ends in the same file, which is almost always
|
||||
/// the case.
|
||||
///
|
||||
/// IMPORTANT: changes to this method should be reflected in implementations of `SpanEncoder`.
|
||||
#[inline]
|
||||
fn unstable_opts_incremental_ignore_spans(&self) -> bool {
|
||||
self.incremental_ignore_spans
|
||||
fn span_hash_stable(&mut self, span: Span, hasher: &mut StableHasher) {
|
||||
const TAG_VALID_SPAN: u8 = 0;
|
||||
const TAG_INVALID_SPAN: u8 = 1;
|
||||
const TAG_RELATIVE_SPAN: u8 = 2;
|
||||
|
||||
if !self.hashing_controls().hash_spans {
|
||||
return;
|
||||
}
|
||||
|
||||
let span = span.data_untracked();
|
||||
span.ctxt.hash_stable(self, hasher);
|
||||
span.parent.hash_stable(self, hasher);
|
||||
|
||||
if span.is_dummy() {
|
||||
Hash::hash(&TAG_INVALID_SPAN, hasher);
|
||||
return;
|
||||
}
|
||||
|
||||
let parent = span.parent.map(|parent| self.def_span(parent).data_untracked());
|
||||
if let Some(parent) = parent
|
||||
&& parent.contains(span)
|
||||
{
|
||||
// This span is enclosed in a definition: only hash the relative position. This catches
|
||||
// a subset of the cases from the `file.contains(parent.lo)`. But we can do this check
|
||||
// cheaply without the expensive `span_data_to_lines_and_cols` query.
|
||||
Hash::hash(&TAG_RELATIVE_SPAN, hasher);
|
||||
(span.lo - parent.lo).to_u32().hash_stable(self, hasher);
|
||||
(span.hi - parent.lo).to_u32().hash_stable(self, hasher);
|
||||
return;
|
||||
}
|
||||
|
||||
// If this is not an empty or invalid span, we want to hash the last position that belongs
|
||||
// to it, as opposed to hashing the first position past it.
|
||||
let Some((file, line_lo, col_lo, line_hi, col_hi)) =
|
||||
self.source_map().span_data_to_lines_and_cols(&span)
|
||||
else {
|
||||
Hash::hash(&TAG_INVALID_SPAN, hasher);
|
||||
return;
|
||||
};
|
||||
|
||||
if let Some(parent) = parent
|
||||
&& file.contains(parent.lo)
|
||||
{
|
||||
// This span is relative to another span in the same file,
|
||||
// only hash the relative position.
|
||||
Hash::hash(&TAG_RELATIVE_SPAN, hasher);
|
||||
Hash::hash(&(span.lo.0.wrapping_sub(parent.lo.0)), hasher);
|
||||
Hash::hash(&(span.hi.0.wrapping_sub(parent.lo.0)), hasher);
|
||||
return;
|
||||
}
|
||||
|
||||
Hash::hash(&TAG_VALID_SPAN, hasher);
|
||||
Hash::hash(&file.stable_id, hasher);
|
||||
|
||||
// Hash both the length and the end location (line/column) of a span. If we hash only the
|
||||
// length, for example, then two otherwise equal spans with different end locations will
|
||||
// have the same hash. This can cause a problem during incremental compilation wherein a
|
||||
// previous result for a query that depends on the end location of a span will be
|
||||
// incorrectly reused when the end location of the span it depends on has changed (see
|
||||
// issue #74890). A similar analysis applies if some query depends specifically on the
|
||||
// length of the span, but we only hash the end location. So hash both.
|
||||
|
||||
let col_lo_trunc = (col_lo.0 as u64) & 0xFF;
|
||||
let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8;
|
||||
let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32;
|
||||
let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40;
|
||||
let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc;
|
||||
let len = (span.hi - span.lo).0;
|
||||
Hash::hash(&col_line, hasher);
|
||||
Hash::hash(&len, hasher);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
|
@ -81,22 +167,26 @@ impl<'a> rustc_span::HashStableContext for StableHashingContext<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Assert that the provided `HashStableContext` is configured with the default
|
||||
/// `HashingControls`. We should always have bailed out before getting to here with a
|
||||
/// non-default mode. With this check in place, we can avoid the need to maintain separate
|
||||
/// versions of `ExpnData` hashes for each permutation of `HashingControls` settings.
|
||||
#[inline]
|
||||
fn def_span(&self, def_id: LocalDefId) -> Span {
|
||||
self.untracked.source_span.get(def_id).unwrap_or(DUMMY_SP)
|
||||
}
|
||||
fn assert_default_hashing_controls(&self, msg: &str) {
|
||||
let hashing_controls = self.hashing_controls;
|
||||
let HashingControls { hash_spans } = hashing_controls;
|
||||
|
||||
#[inline]
|
||||
fn span_data_to_lines_and_cols(
|
||||
&mut self,
|
||||
span: &SpanData,
|
||||
) -> Option<(&SourceFile, usize, BytePos, usize, BytePos)> {
|
||||
self.source_map().span_data_to_lines_and_cols(span)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn hashing_controls(&self) -> HashingControls {
|
||||
self.hashing_controls
|
||||
// Note that we require that `hash_spans` be the inverse of the global `-Z
|
||||
// incremental-ignore-spans` option. Normally, this option is disabled, in which case
|
||||
// `hash_spans` must be true.
|
||||
//
|
||||
// Span hashing can also be disabled without `-Z incremental-ignore-spans`. This is the
|
||||
// case for instance when building a hash for name mangling. Such configuration must not be
|
||||
// used for metadata.
|
||||
assert_eq!(
|
||||
hash_spans, !self.incremental_ignore_spans,
|
||||
"Attempted hashing of {msg} with non-default HashingControls: {hashing_controls:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ use std::{fmt, iter, mem};
|
|||
|
||||
use rustc_data_structures::fingerprint::Fingerprint;
|
||||
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
|
||||
use rustc_data_structures::stable_hasher::{HashStable, HashingControls, StableHasher};
|
||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
|
||||
use rustc_data_structures::sync::Lock;
|
||||
use rustc_data_structures::unhash::UnhashMap;
|
||||
use rustc_hashes::Hash64;
|
||||
|
|
@ -126,29 +126,6 @@ rustc_index::newtype_index! {
|
|||
impl !Ord for LocalExpnId {}
|
||||
impl !PartialOrd for LocalExpnId {}
|
||||
|
||||
/// Assert that the provided `HashStableContext` is configured with the 'default'
|
||||
/// `HashingControls`. We should always have bailed out before getting to here
|
||||
/// with a non-default mode. With this check in place, we can avoid the need
|
||||
/// to maintain separate versions of `ExpnData` hashes for each permutation
|
||||
/// of `HashingControls` settings.
|
||||
fn assert_default_hashing_controls(ctx: &impl HashStableContext, msg: &str) {
|
||||
let hashing_controls = ctx.hashing_controls();
|
||||
let HashingControls { hash_spans } = hashing_controls;
|
||||
|
||||
// Note that we require that `hash_spans` be the inverse of the global
|
||||
// `-Z incremental-ignore-spans` option. Normally, this option is disabled,
|
||||
// in which case `hash_spans` must be true.
|
||||
//
|
||||
// Span hashing can also be disabled without `-Z incremental-ignore-spans`.
|
||||
// This is the case for instance when building a hash for name mangling.
|
||||
// Such configuration must not be used for metadata.
|
||||
assert_eq!(
|
||||
hash_spans,
|
||||
!ctx.unstable_opts_incremental_ignore_spans(),
|
||||
"Attempted hashing of {msg} with non-default HashingControls: {hashing_controls:?}"
|
||||
);
|
||||
}
|
||||
|
||||
/// A unique hash value associated to an expansion.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug, Encodable, Decodable, HashStable_Generic)]
|
||||
pub struct ExpnHash(Fingerprint);
|
||||
|
|
@ -1508,7 +1485,7 @@ pub fn raw_encode_syntax_context(
|
|||
fn update_disambiguator(expn_data: &mut ExpnData, mut ctx: impl HashStableContext) -> ExpnHash {
|
||||
// This disambiguator should not have been set yet.
|
||||
assert_eq!(expn_data.disambiguator, 0, "Already set disambiguator for ExpnData: {expn_data:?}");
|
||||
assert_default_hashing_controls(&ctx, "ExpnData (disambiguator)");
|
||||
ctx.assert_default_hashing_controls("ExpnData (disambiguator)");
|
||||
let mut expn_hash = expn_data.hash_expn(&mut ctx);
|
||||
|
||||
let disambiguator = HygieneData::with(|data| {
|
||||
|
|
@ -1558,7 +1535,7 @@ impl<CTX: HashStableContext> HashStable<CTX> for SyntaxContext {
|
|||
|
||||
impl<CTX: HashStableContext> HashStable<CTX> for ExpnId {
|
||||
fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
|
||||
assert_default_hashing_controls(ctx, "ExpnId");
|
||||
ctx.assert_default_hashing_controls("ExpnId");
|
||||
let hash = if *self == ExpnId::root() {
|
||||
// Avoid fetching TLS storage for a trivial often-used value.
|
||||
Fingerprint::ZERO
|
||||
|
|
|
|||
|
|
@ -54,7 +54,6 @@ use hygiene::Transparency;
|
|||
pub use hygiene::{
|
||||
DesugaringKind, ExpnData, ExpnHash, ExpnId, ExpnKind, LocalExpnId, MacroKind, SyntaxContext,
|
||||
};
|
||||
use rustc_data_structures::stable_hasher::HashingControls;
|
||||
pub mod def_id;
|
||||
use def_id::{CrateNum, DefId, DefIndex, DefPathHash, LOCAL_CRATE, LocalDefId, StableCrateId};
|
||||
pub mod edit_distance;
|
||||
|
|
@ -2796,105 +2795,24 @@ impl InnerSpan {
|
|||
/// This is a hack to allow using the [`HashStable_Generic`] derive macro
|
||||
/// instead of implementing everything in rustc_middle.
|
||||
pub trait HashStableContext {
|
||||
/// The main event: stable hashing of a span.
|
||||
fn span_hash_stable(&mut self, span: Span, hasher: &mut StableHasher);
|
||||
|
||||
/// Compute a `DefPathHash`.
|
||||
fn def_path_hash(&self, def_id: DefId) -> DefPathHash;
|
||||
/// Accesses `sess.opts.unstable_opts.incremental_ignore_spans` since
|
||||
/// we don't have easy access to a `Session`
|
||||
fn unstable_opts_incremental_ignore_spans(&self) -> bool;
|
||||
fn def_span(&self, def_id: LocalDefId) -> Span;
|
||||
fn span_data_to_lines_and_cols(
|
||||
&mut self,
|
||||
span: &SpanData,
|
||||
) -> Option<(&SourceFile, usize, BytePos, usize, BytePos)>;
|
||||
fn hashing_controls(&self) -> HashingControls;
|
||||
|
||||
/// Assert that the provided `HashStableContext` is configured with the default
|
||||
/// `HashingControls`. We should always have bailed out before getting to here with a
|
||||
fn assert_default_hashing_controls(&self, msg: &str);
|
||||
}
|
||||
|
||||
impl<CTX> HashStable<CTX> for Span
|
||||
where
|
||||
CTX: HashStableContext,
|
||||
{
|
||||
/// Hashes a span in a stable way. We can't directly hash the span's `BytePos`
|
||||
/// fields (that would be similar to hashing pointers, since those are just
|
||||
/// offsets into the `SourceMap`). Instead, we hash the (file name, line, column)
|
||||
/// triple, which stays the same even if the containing `SourceFile` has moved
|
||||
/// within the `SourceMap`.
|
||||
///
|
||||
/// Also note that we are hashing byte offsets for the column, not unicode
|
||||
/// codepoint offsets. For the purpose of the hash that's sufficient.
|
||||
/// Also, hashing filenames is expensive so we avoid doing it twice when the
|
||||
/// span starts and ends in the same file, which is almost always the case.
|
||||
///
|
||||
/// IMPORTANT: changes to this method should be reflected in implementations of `SpanEncoder`.
|
||||
fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) {
|
||||
const TAG_VALID_SPAN: u8 = 0;
|
||||
const TAG_INVALID_SPAN: u8 = 1;
|
||||
const TAG_RELATIVE_SPAN: u8 = 2;
|
||||
|
||||
if !ctx.hashing_controls().hash_spans {
|
||||
return;
|
||||
}
|
||||
|
||||
let span = self.data_untracked();
|
||||
span.ctxt.hash_stable(ctx, hasher);
|
||||
span.parent.hash_stable(ctx, hasher);
|
||||
|
||||
if span.is_dummy() {
|
||||
Hash::hash(&TAG_INVALID_SPAN, hasher);
|
||||
return;
|
||||
}
|
||||
|
||||
let parent = span.parent.map(|parent| ctx.def_span(parent).data_untracked());
|
||||
if let Some(parent) = parent
|
||||
&& parent.contains(span)
|
||||
{
|
||||
// This span is enclosed in a definition: only hash the relative position.
|
||||
// This catches a subset of the cases from the `file.contains(parent.lo)`,
|
||||
// But we can do this check cheaply without the expensive `span_data_to_lines_and_cols` query.
|
||||
Hash::hash(&TAG_RELATIVE_SPAN, hasher);
|
||||
(span.lo - parent.lo).to_u32().hash_stable(ctx, hasher);
|
||||
(span.hi - parent.lo).to_u32().hash_stable(ctx, hasher);
|
||||
return;
|
||||
}
|
||||
|
||||
// If this is not an empty or invalid span, we want to hash the last
|
||||
// position that belongs to it, as opposed to hashing the first
|
||||
// position past it.
|
||||
let Some((file, line_lo, col_lo, line_hi, col_hi)) = ctx.span_data_to_lines_and_cols(&span)
|
||||
else {
|
||||
Hash::hash(&TAG_INVALID_SPAN, hasher);
|
||||
return;
|
||||
};
|
||||
|
||||
if let Some(parent) = parent
|
||||
&& file.contains(parent.lo)
|
||||
{
|
||||
// This span is relative to another span in the same file,
|
||||
// only hash the relative position.
|
||||
Hash::hash(&TAG_RELATIVE_SPAN, hasher);
|
||||
Hash::hash(&(span.lo.0.wrapping_sub(parent.lo.0)), hasher);
|
||||
Hash::hash(&(span.hi.0.wrapping_sub(parent.lo.0)), hasher);
|
||||
return;
|
||||
}
|
||||
|
||||
Hash::hash(&TAG_VALID_SPAN, hasher);
|
||||
Hash::hash(&file.stable_id, hasher);
|
||||
|
||||
// Hash both the length and the end location (line/column) of a span. If we
|
||||
// hash only the length, for example, then two otherwise equal spans with
|
||||
// different end locations will have the same hash. This can cause a problem
|
||||
// during incremental compilation wherein a previous result for a query that
|
||||
// depends on the end location of a span will be incorrectly reused when the
|
||||
// end location of the span it depends on has changed (see issue #74890). A
|
||||
// similar analysis applies if some query depends specifically on the length
|
||||
// of the span, but we only hash the end location. So hash both.
|
||||
|
||||
let col_lo_trunc = (col_lo.0 as u64) & 0xFF;
|
||||
let line_lo_trunc = ((line_lo as u64) & 0xFF_FF_FF) << 8;
|
||||
let col_hi_trunc = (col_hi.0 as u64) & 0xFF << 32;
|
||||
let line_hi_trunc = ((line_hi as u64) & 0xFF_FF_FF) << 40;
|
||||
let col_line = col_lo_trunc | line_lo_trunc | col_hi_trunc | line_hi_trunc;
|
||||
let len = (span.hi - span.lo).0;
|
||||
Hash::hash(&col_line, hasher);
|
||||
Hash::hash(&len, hasher);
|
||||
// `span_hash_stable` does all the work.
|
||||
ctx.span_hash_stable(*self, hasher)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue