Auto merge of #145737 - cjgillot:gvn-valueset, r=saethlin

GVN: stop hashing opaque values

GVN generates values that are not meant to be unified with any other. For instance `Opaque` (aka we don't know anything), non-deterministic constants and borrows.

The current algorithm generates a unique index, so the generated `Value` will be different from all the existing. This is wasteful, as we should not hash that `Value` at all.

This PR proposes to do this. This involves partially reimplementing a `FxIndexSet`, but yields a small but consistent perf improvement (https://github.com/rust-lang/rust/pull/145737#issuecomment-3276951054).
This commit is contained in:
bors 2025-09-19 12:15:03 +00:00
commit e10aa88911
3 changed files with 180 additions and 44 deletions

View file

@ -4235,6 +4235,7 @@ name = "rustc_mir_transform"
version = "0.0.0"
dependencies = [
"either",
"hashbrown",
"itertools",
"rustc_abi",
"rustc_arena",

View file

@ -6,6 +6,7 @@ edition = "2024"
[dependencies]
# tidy-alphabetical-start
either = "1"
hashbrown = "0.15"
itertools = "0.12"
rustc_abi = { path = "../rustc_abi" }
rustc_arena = { path = "../rustc_arena" }

View file

@ -85,8 +85,10 @@
//! that contain `AllocId`s.
use std::borrow::Cow;
use std::hash::{Hash, Hasher};
use either::Either;
use hashbrown::hash_table::{Entry, HashTable};
use itertools::Itertools as _;
use rustc_abi::{self as abi, BackendRepr, FIRST_VARIANT, FieldIdx, Primitive, Size, VariantIdx};
use rustc_const_eval::const_eval::DummyMachine;
@ -94,7 +96,7 @@ use rustc_const_eval::interpret::{
ImmTy, Immediate, InterpCx, MemPlaceMeta, MemoryKind, OpTy, Projectable, Scalar,
intern_const_alloc_for_constprop,
};
use rustc_data_structures::fx::{FxIndexSet, MutableValues};
use rustc_data_structures::fx::FxHasher;
use rustc_data_structures::graph::dominators::Dominators;
use rustc_hir::def::DefKind;
use rustc_index::bit_set::DenseBitSet;
@ -152,9 +154,29 @@ impl<'tcx> crate::MirPass<'tcx> for GVN {
}
newtype_index! {
/// This represents a `Value` in the symbolic execution.
#[debug_format = "_v{}"]
struct VnIndex {}
}
/// Marker type to forbid hashing and comparing opaque values.
/// This struct should only be constructed by `ValueSet::insert_unique` to ensure we use that
/// method to create non-unifiable values. It will ICE if used in `ValueSet::insert`.
#[derive(Copy, Clone, Debug, Eq)]
struct VnOpaque;
impl PartialEq for VnOpaque {
fn eq(&self, _: &VnOpaque) -> bool {
// ICE if we try to compare unique values
unreachable!()
}
}
impl Hash for VnOpaque {
fn hash<T: Hasher>(&self, _: &mut T) {
// ICE if we try to hash unique values
unreachable!()
}
}
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
enum AddressKind {
Ref(BorrowKind),
@ -166,15 +188,17 @@ enum Value<'tcx> {
// Root values.
/// Used to represent values we know nothing about.
/// The `usize` is a counter incremented by `new_opaque`.
Opaque(usize),
Opaque(VnOpaque),
/// Evaluated or unevaluated constant value.
Constant {
value: Const<'tcx>,
/// Some constants do not have a deterministic value. To avoid merging two instances of the
/// same `Const`, we assign them an additional integer index.
// `disambiguator` is 0 iff the constant is deterministic.
disambiguator: usize,
// `disambiguator` is `None` iff the constant is deterministic.
disambiguator: Option<VnOpaque>,
},
// Aggregates.
/// An aggregate value, either tuple/closure/struct/enum.
/// This does not contain unions, as we cannot reason with the value.
Aggregate(VariantIdx, Vec<VnIndex>),
@ -192,7 +216,7 @@ enum Value<'tcx> {
place: Place<'tcx>,
kind: AddressKind,
/// Give each borrow and pointer a different provenance, so we don't merge them.
provenance: usize,
provenance: VnOpaque,
},
// Extractions.
@ -211,6 +235,107 @@ enum Value<'tcx> {
},
}
/// Stores and deduplicates pairs of `(Value, Ty)` into in `VnIndex` numbered values.
///
/// This data structure is mostly a partial reimplementation of `FxIndexMap<VnIndex, (Value, Ty)>`.
/// We do not use a regular `FxIndexMap` to skip hashing values that are unique by construction,
/// like opaque values, address with provenance and non-deterministic constants.
struct ValueSet<'tcx> {
indices: HashTable<VnIndex>,
hashes: IndexVec<VnIndex, u64>,
values: IndexVec<VnIndex, Value<'tcx>>,
types: IndexVec<VnIndex, Ty<'tcx>>,
}
impl<'tcx> ValueSet<'tcx> {
fn new(num_values: usize) -> ValueSet<'tcx> {
ValueSet {
indices: HashTable::with_capacity(num_values),
hashes: IndexVec::with_capacity(num_values),
values: IndexVec::with_capacity(num_values),
types: IndexVec::with_capacity(num_values),
}
}
/// Insert a `(Value, Ty)` pair without hashing or deduplication.
/// This always creates a new `VnIndex`.
#[inline]
fn insert_unique(
&mut self,
ty: Ty<'tcx>,
value: impl FnOnce(VnOpaque) -> Value<'tcx>,
) -> VnIndex {
let value = value(VnOpaque);
debug_assert!(match value {
Value::Opaque(_) | Value::Address { .. } => true,
Value::Constant { disambiguator, .. } => disambiguator.is_some(),
_ => false,
});
let index = self.hashes.push(0);
let _index = self.types.push(ty);
debug_assert_eq!(index, _index);
let _index = self.values.push(value);
debug_assert_eq!(index, _index);
index
}
/// Insert a `(Value, Ty)` pair to be deduplicated.
/// Returns `true` as second tuple field if this value did not exist previously.
#[allow(rustc::pass_by_value)] // closures take `&VnIndex`
fn insert(&mut self, ty: Ty<'tcx>, value: Value<'tcx>) -> (VnIndex, bool) {
debug_assert!(match value {
Value::Opaque(_) | Value::Address { .. } => false,
Value::Constant { disambiguator, .. } => disambiguator.is_none(),
_ => true,
});
let hash: u64 = {
let mut h = FxHasher::default();
value.hash(&mut h);
ty.hash(&mut h);
h.finish()
};
let eq = |index: &VnIndex| self.values[*index] == value && self.types[*index] == ty;
let hasher = |index: &VnIndex| self.hashes[*index];
match self.indices.entry(hash, eq, hasher) {
Entry::Occupied(entry) => {
let index = *entry.get();
(index, false)
}
Entry::Vacant(entry) => {
let index = self.hashes.push(hash);
entry.insert(index);
let _index = self.values.push(value);
debug_assert_eq!(index, _index);
let _index = self.types.push(ty);
debug_assert_eq!(index, _index);
(index, true)
}
}
}
/// Return the `Value` associated with the given `VnIndex`.
#[inline]
fn value(&self, index: VnIndex) -> &Value<'tcx> {
&self.values[index]
}
/// Return the type associated with the given `VnIndex`.
#[inline]
fn ty(&self, index: VnIndex) -> Ty<'tcx> {
self.types[index]
}
/// Replace the value associated with `index` with an opaque value.
#[inline]
fn forget(&mut self, index: VnIndex) {
self.values[index] = Value::Opaque(VnOpaque);
}
}
struct VnState<'body, 'tcx> {
tcx: TyCtxt<'tcx>,
ecx: InterpCx<'tcx, DummyMachine>,
@ -221,11 +346,9 @@ struct VnState<'body, 'tcx> {
/// Locals that are assigned that value.
// This vector does not hold all the values of `VnIndex` that we create.
rev_locals: IndexVec<VnIndex, SmallVec<[Local; 1]>>,
values: FxIndexSet<(Value<'tcx>, Ty<'tcx>)>,
values: ValueSet<'tcx>,
/// Values evaluated as constants if possible.
evaluated: IndexVec<VnIndex, Option<OpTy<'tcx>>>,
/// Counter to generate different values.
next_opaque: usize,
/// Cache the deref values.
derefs: Vec<VnIndex>,
ssa: &'body SsaLocals,
@ -256,9 +379,8 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
is_coroutine: body.coroutine.is_some(),
locals: IndexVec::from_elem(None, local_decls),
rev_locals: IndexVec::with_capacity(num_values),
values: FxIndexSet::with_capacity_and_hasher(num_values, Default::default()),
values: ValueSet::new(num_values),
evaluated: IndexVec::with_capacity(num_values),
next_opaque: 1,
derefs: Vec::new(),
ssa,
dominators,
@ -272,8 +394,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
#[instrument(level = "trace", skip(self), ret)]
fn insert(&mut self, ty: Ty<'tcx>, value: Value<'tcx>) -> VnIndex {
let (index, new) = self.values.insert_full((value, ty));
let index = VnIndex::from_usize(index);
let (index, new) = self.values.insert(ty, value);
if new {
// Grow `evaluated` and `rev_locals` here to amortize the allocations.
let evaluated = self.eval_to_const(index);
@ -285,18 +406,16 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
index
}
fn next_opaque(&mut self) -> usize {
let next_opaque = self.next_opaque;
self.next_opaque += 1;
next_opaque
}
/// Create a new `Value` for which we have no information at all, except that it is distinct
/// from all the others.
#[instrument(level = "trace", skip(self), ret)]
fn new_opaque(&mut self, ty: Ty<'tcx>) -> VnIndex {
let value = Value::Opaque(self.next_opaque());
self.insert(ty, value)
let index = self.values.insert_unique(ty, Value::Opaque);
let _index = self.evaluated.push(None);
debug_assert_eq!(index, _index);
let _index = self.rev_locals.push(SmallVec::new());
debug_assert_eq!(index, _index);
index
}
/// Create a new `Value::Address` distinct from all the others.
@ -309,18 +428,49 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
}
AddressKind::Address(mutbl) => Ty::new_ptr(self.tcx, pty, mutbl.to_mutbl_lossy()),
};
let value = Value::Address { place, kind, provenance: self.next_opaque() };
self.insert(ty, value)
let index =
self.values.insert_unique(ty, |provenance| Value::Address { place, kind, provenance });
let evaluated = self.eval_to_const(index);
let _index = self.evaluated.push(evaluated);
debug_assert_eq!(index, _index);
let _index = self.rev_locals.push(SmallVec::new());
debug_assert_eq!(index, _index);
index
}
#[instrument(level = "trace", skip(self), ret)]
fn insert_constant(&mut self, value: Const<'tcx>) -> VnIndex {
let (index, new) = if value.is_deterministic() {
// The constant is deterministic, no need to disambiguate.
let constant = Value::Constant { value, disambiguator: None };
self.values.insert(value.ty(), constant)
} else {
// Multiple mentions of this constant will yield different values,
// so assign a different `disambiguator` to ensure they do not get the same `VnIndex`.
let index = self.values.insert_unique(value.ty(), |disambiguator| Value::Constant {
value,
disambiguator: Some(disambiguator),
});
(index, true)
};
if new {
let evaluated = self.eval_to_const(index);
let _index = self.evaluated.push(evaluated);
debug_assert_eq!(index, _index);
let _index = self.rev_locals.push(SmallVec::new());
debug_assert_eq!(index, _index);
}
index
}
#[inline]
fn get(&self, index: VnIndex) -> &Value<'tcx> {
&self.values.get_index(index.as_usize()).unwrap().0
self.values.value(index)
}
#[inline]
fn ty(&self, index: VnIndex) -> Ty<'tcx> {
self.values.get_index(index.as_usize()).unwrap().1
self.values.ty(index)
}
/// Record that `local` is assigned `value`. `local` must be SSA.
@ -331,33 +481,18 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
self.rev_locals[value].push(local);
}
fn insert_constant(&mut self, value: Const<'tcx>) -> VnIndex {
let disambiguator = if value.is_deterministic() {
// The constant is deterministic, no need to disambiguate.
0
} else {
// Multiple mentions of this constant will yield different values,
// so assign a different `disambiguator` to ensure they do not get the same `VnIndex`.
let disambiguator = self.next_opaque();
// `disambiguator: 0` means deterministic.
debug_assert_ne!(disambiguator, 0);
disambiguator
};
self.insert(value.ty(), Value::Constant { value, disambiguator })
}
fn insert_bool(&mut self, flag: bool) -> VnIndex {
// Booleans are deterministic.
let value = Const::from_bool(self.tcx, flag);
debug_assert!(value.is_deterministic());
self.insert(self.tcx.types.bool, Value::Constant { value, disambiguator: 0 })
self.insert(self.tcx.types.bool, Value::Constant { value, disambiguator: None })
}
fn insert_scalar(&mut self, ty: Ty<'tcx>, scalar: Scalar) -> VnIndex {
// Scalars are deterministic.
let value = Const::from_scalar(self.tcx, scalar, ty);
debug_assert!(value.is_deterministic());
self.insert(ty, Value::Constant { value, disambiguator: 0 })
self.insert(ty, Value::Constant { value, disambiguator: None })
}
fn insert_tuple(&mut self, ty: Ty<'tcx>, values: Vec<VnIndex>) -> VnIndex {
@ -372,8 +507,7 @@ impl<'body, 'tcx> VnState<'body, 'tcx> {
fn invalidate_derefs(&mut self) {
for deref in std::mem::take(&mut self.derefs) {
let opaque = self.next_opaque();
self.values.get_index_mut2(deref.index()).unwrap().0 = Value::Opaque(opaque);
self.values.forget(deref);
}
}
@ -1572,7 +1706,7 @@ impl<'tcx> VnState<'_, 'tcx> {
// This was already constant in MIR, do not change it. If the constant is not
// deterministic, adding an additional mention of it in MIR will not give the same value as
// the former mention.
if let Value::Constant { value, disambiguator: 0 } = *self.get(index) {
if let Value::Constant { value, disambiguator: None } = *self.get(index) {
debug_assert!(value.is_deterministic());
return Some(ConstOperand { span: DUMMY_SP, user_ty: None, const_: value });
}