Use HybridBitSet for rows within SparseBitMatrix.

This requires adding a few extra methods to `HybridBitSet`. (These are
tested in a new unit test.)

This commit reduces the `max-rss` for `nll-check` builds of `html5ever`
by 46%, `ucd` by 45%, `clap-rs` by 23%, `inflate` by 14%. And the
results for the `unic-ucd-name` crate are even more impressive: a 21%
reduction in instructions, a 60% reduction in wall-time, a 96%
reduction in `max-rss`, and a 97% reduction in faults!

Fixes #52028.
This commit is contained in:
Nicholas Nethercote 2018-09-18 14:21:41 +10:00
parent 687cc292fd
commit 154be2c98c
3 changed files with 179 additions and 36 deletions

View file

@ -337,6 +337,10 @@ impl<T: Idx> SparseBitSet<T> {
self.0.len()
}
fn is_empty(&self) -> bool {
self.0.len() == 0
}
fn contains(&self, elem: T) -> bool {
self.0.contains(&elem)
}
@ -417,15 +421,28 @@ pub enum HybridBitSet<T: Idx> {
}
impl<T: Idx> HybridBitSet<T> {
// FIXME: This function is used in conjunction with `mem::replace()` in
// several pieces of awful code below. I can't work out how else to appease
// the borrow checker.
fn dummy() -> Self {
// The cheapest HybridBitSet to construct, which is only used to get
// around the borrow checker.
HybridBitSet::Sparse(SparseBitSet::new_empty(), 0)
}
pub fn new_empty(domain_size: usize) -> Self {
HybridBitSet::Sparse(SparseBitSet::new_empty(), domain_size)
}
pub fn clear(&mut self) {
let domain_size = match *self {
pub fn domain_size(&self) -> usize {
match *self {
HybridBitSet::Sparse(_, size) => size,
HybridBitSet::Dense(_, size) => size,
};
}
}
pub fn clear(&mut self) {
let domain_size = self.domain_size();
*self = HybridBitSet::new_empty(domain_size);
}
@ -436,6 +453,22 @@ impl<T: Idx> HybridBitSet<T> {
}
}
pub fn superset(&self, other: &HybridBitSet<T>) -> bool {
match (self, other) {
(HybridBitSet::Dense(self_dense, _), HybridBitSet::Dense(other_dense, _)) => {
self_dense.superset(other_dense)
}
_ => other.iter().all(|elem| self.contains(elem)),
}
}
pub fn is_empty(&self) -> bool {
match self {
HybridBitSet::Sparse(sparse, _) => sparse.is_empty(),
HybridBitSet::Dense(dense, _) => dense.is_empty(),
}
}
pub fn insert(&mut self, elem: T) -> bool {
match self {
HybridBitSet::Sparse(sparse, _) if sparse.len() < SPARSE_MAX => {
@ -449,19 +482,15 @@ impl<T: Idx> HybridBitSet<T> {
}
HybridBitSet::Sparse(_, _) => {
// The set is sparse and full. Convert to a dense set.
//
// FIXME: This code is awful, but I can't work out how else to
// appease the borrow checker.
let dummy = HybridBitSet::Sparse(SparseBitSet::new_empty(), 0);
match mem::replace(self, dummy) {
match mem::replace(self, HybridBitSet::dummy()) {
HybridBitSet::Sparse(sparse, domain_size) => {
let mut dense = sparse.to_dense(domain_size);
let changed = dense.insert(elem);
assert!(changed);
mem::replace(self, HybridBitSet::Dense(dense, domain_size));
*self = HybridBitSet::Dense(dense, domain_size);
changed
}
_ => panic!("impossible"),
_ => unreachable!()
}
}
@ -469,6 +498,17 @@ impl<T: Idx> HybridBitSet<T> {
}
}
pub fn insert_all(&mut self) {
let domain_size = self.domain_size();
match self {
HybridBitSet::Sparse(_, _) => {
let dense = BitSet::new_filled(domain_size);
*self = HybridBitSet::Dense(dense, domain_size);
}
HybridBitSet::Dense(dense, _) => dense.insert_all(),
}
}
pub fn remove(&mut self, elem: T) -> bool {
// Note: we currently don't bother going from Dense back to Sparse.
match self {
@ -477,6 +517,42 @@ impl<T: Idx> HybridBitSet<T> {
}
}
pub fn union(&mut self, other: &HybridBitSet<T>) -> bool {
match self {
HybridBitSet::Sparse(_, _) => {
match other {
HybridBitSet::Sparse(other_sparse, _) => {
// Both sets are sparse. Add the elements in
// `other_sparse` to `self_hybrid` one at a time. This
// may or may not cause `self_hybrid` to be densified.
let mut self_hybrid = mem::replace(self, HybridBitSet::dummy());
let mut changed = false;
for elem in other_sparse.iter() {
changed |= self_hybrid.insert(*elem);
}
*self = self_hybrid;
changed
}
HybridBitSet::Dense(other_dense, _) => {
// `self` is sparse and `other` is dense. Densify
// `self` and then do the bitwise union.
match mem::replace(self, HybridBitSet::dummy()) {
HybridBitSet::Sparse(self_sparse, self_domain_size) => {
let mut new_dense = self_sparse.to_dense(self_domain_size);
let changed = new_dense.union(other_dense);
*self = HybridBitSet::Dense(new_dense, self_domain_size);
changed
}
_ => unreachable!()
}
}
}
}
HybridBitSet::Dense(self_dense, _) => self_dense.union(other),
}
}
/// Converts to a dense set, consuming itself in the process.
pub fn to_dense(self) -> BitSet<T> {
match self {
@ -687,11 +763,10 @@ impl<R: Idx, C: Idx> BitMatrix<R, C> {
/// sparse representation.
///
/// Initially, every row has no explicit representation. If any bit within a
/// row is set, the entire row is instantiated as
/// `Some(<full-column-width-BitSet>)`. Furthermore, any previously
/// uninstantiated rows prior to it will be instantiated as `None`. Those prior
/// rows may themselves become fully instantiated later on if any of their bits
/// are set.
/// row is set, the entire row is instantiated as `Some(<HybridBitSet>)`.
/// Furthermore, any previously uninstantiated rows prior to it will be
/// instantiated as `None`. Those prior rows may themselves become fully
/// instantiated later on if any of their bits are set.
///
/// `R` and `C` are index types used to identify rows and columns respectively;
/// typically newtyped `usize` wrappers, but they can also just be `usize`.
@ -702,7 +777,7 @@ where
C: Idx,
{
num_columns: usize,
rows: IndexVec<R, Option<BitSet<C>>>,
rows: IndexVec<R, Option<HybridBitSet<C>>>,
}
impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
@ -714,14 +789,14 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
}
}
fn ensure_row(&mut self, row: R) -> &mut BitSet<C> {
fn ensure_row(&mut self, row: R) -> &mut HybridBitSet<C> {
// Instantiate any missing rows up to and including row `row` with an
// empty BitSet.
// empty HybridBitSet.
self.rows.ensure_contains_elem(row, || None);
// Then replace row `row` with a full BitSet if necessary.
// Then replace row `row` with a full HybridBitSet if necessary.
let num_columns = self.num_columns;
self.rows[row].get_or_insert_with(|| BitSet::new_empty(num_columns))
self.rows[row].get_or_insert_with(|| HybridBitSet::new_empty(num_columns))
}
/// Sets the cell at `(row, column)` to true. Put another way, insert
@ -760,8 +835,8 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
}
}
/// Merge a row, `from`, into the `into` row.
pub fn union_into_row(&mut self, into: R, from: &BitSet<C>) -> bool {
/// Union a row, `from`, into the `into` row.
pub fn union_into_row(&mut self, into: R, from: &HybridBitSet<C>) -> bool {
self.ensure_row(into).union(from)
}
@ -780,7 +855,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
self.row(row).into_iter().flat_map(|r| r.iter())
}
pub fn row(&self, row: R) -> Option<&BitSet<C>> {
pub fn row(&self, row: R) -> Option<&HybridBitSet<C>> {
if let Some(Some(row)) = self.rows.get(row) {
Some(row)
} else {
@ -871,7 +946,7 @@ fn bitset_iter_works_2() {
}
#[test]
fn union_two_vecs() {
fn union_two_sets() {
let mut set1: BitSet<usize> = BitSet::new_empty(65);
let mut set2: BitSet<usize> = BitSet::new_empty(65);
assert!(set1.insert(3));
@ -887,6 +962,74 @@ fn union_two_vecs() {
assert!(set1.contains(64));
}
#[test]
fn hybrid_bitset() {
let mut sparse038: HybridBitSet<usize> = HybridBitSet::new_empty(256);
assert!(sparse038.is_empty());
assert!(sparse038.insert(0));
assert!(sparse038.insert(1));
assert!(sparse038.insert(8));
assert!(sparse038.insert(3));
assert!(!sparse038.insert(3));
assert!(sparse038.remove(1));
assert!(!sparse038.is_empty());
assert_eq!(sparse038.iter().collect::<Vec<_>>(), [0, 3, 8]);
for i in 0..256 {
if i == 0 || i == 3 || i == 8 {
assert!(sparse038.contains(i));
} else {
assert!(!sparse038.contains(i));
}
}
let mut sparse01358 = sparse038.clone();
assert!(sparse01358.insert(1));
assert!(sparse01358.insert(5));
assert_eq!(sparse01358.iter().collect::<Vec<_>>(), [0, 1, 3, 5, 8]);
let mut dense10 = HybridBitSet::new_empty(256);
for i in 0..10 {
assert!(dense10.insert(i));
}
assert!(!dense10.is_empty());
assert_eq!(dense10.iter().collect::<Vec<_>>(), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
let mut dense256 = HybridBitSet::new_empty(256);
assert!(dense256.is_empty());
dense256.insert_all();
assert!(!dense256.is_empty());
for i in 0..256 {
assert!(dense256.contains(i));
}
assert!(sparse038.superset(&sparse038)); // sparse + sparse (self)
assert!(sparse01358.superset(&sparse038)); // sparse + sparse
assert!(dense10.superset(&sparse038)); // dense + sparse
assert!(dense10.superset(&dense10)); // dense + dense (self)
assert!(dense256.superset(&dense10)); // dense + dense
let mut hybrid = sparse038;
assert!(!sparse01358.union(&hybrid)); // no change
assert!(hybrid.union(&sparse01358));
assert!(hybrid.superset(&sparse01358) && sparse01358.superset(&hybrid));
assert!(!dense10.union(&sparse01358));
assert!(!dense256.union(&dense10));
let mut dense = dense10;
assert!(dense.union(&dense256));
assert!(dense.superset(&dense256) && dense256.superset(&dense));
assert!(hybrid.union(&dense256));
assert!(hybrid.superset(&dense256) && dense256.superset(&hybrid));
assert_eq!(dense256.iter().count(), 256);
let mut dense0 = dense256;
for i in 0..256 {
assert!(dense0.remove(i));
}
assert!(!dense0.remove(0));
assert!(dense0.is_empty());
}
#[test]
fn grow() {
let mut set: GrowableBitSet<usize> = GrowableBitSet::with_capacity(65);

View file

@ -10,7 +10,7 @@
use rustc::mir::{BasicBlock, Location, Mir};
use rustc::ty::{self, RegionVid};
use rustc_data_structures::bit_set::{BitSet, SparseBitMatrix};
use rustc_data_structures::bit_set::{HybridBitSet, SparseBitMatrix};
use rustc_data_structures::indexed_vec::Idx;
use rustc_data_structures::indexed_vec::IndexVec;
use std::fmt::Debug;
@ -184,7 +184,7 @@ impl<N: Idx> LivenessValues<N> {
/// Adds all the elements in the given bit array into the given
/// region. Returns true if any of them are newly added.
crate fn add_elements(&mut self, row: N, locations: &BitSet<PointIndex>) -> bool {
crate fn add_elements(&mut self, row: N, locations: &HybridBitSet<PointIndex>) -> bool {
debug!("LivenessValues::add_elements(row={:?}, locations={:?})", row, locations);
self.points.union_into_row(row, locations)
}

View file

@ -22,7 +22,7 @@ use rustc::traits::query::dropck_outlives::DropckOutlivesResult;
use rustc::traits::query::type_op::outlives::DropckOutlives;
use rustc::traits::query::type_op::TypeOp;
use rustc::ty::{Ty, TypeFoldable};
use rustc_data_structures::bit_set::BitSet;
use rustc_data_structures::bit_set::HybridBitSet;
use rustc_data_structures::fx::FxHashMap;
use std::rc::Rc;
use util::liveness::LiveVariableMap;
@ -121,16 +121,16 @@ where
cx: LivenessContext<'me, 'typeck, 'flow, 'gcx, 'tcx>,
/// Set of points that define the current local.
defs: BitSet<PointIndex>,
defs: HybridBitSet<PointIndex>,
/// Points where the current variable is "use live" -- meaning
/// that there is a future "full use" that may use its value.
use_live_at: BitSet<PointIndex>,
use_live_at: HybridBitSet<PointIndex>,
/// Points where the current variable is "drop live" -- meaning
/// that there is no future "full use" that may use its value, but
/// there is a future drop.
drop_live_at: BitSet<PointIndex>,
drop_live_at: HybridBitSet<PointIndex>,
/// Locations where drops may occur.
drop_locations: Vec<Location>,
@ -144,9 +144,9 @@ impl LivenessResults<'me, 'typeck, 'flow, 'gcx, 'tcx> {
let num_points = cx.elements.num_points();
LivenessResults {
cx,
defs: BitSet::new_empty(num_points),
use_live_at: BitSet::new_empty(num_points),
drop_live_at: BitSet::new_empty(num_points),
defs: HybridBitSet::new_empty(num_points),
use_live_at: HybridBitSet::new_empty(num_points),
drop_live_at: HybridBitSet::new_empty(num_points),
drop_locations: vec![],
stack: vec![],
}
@ -448,7 +448,7 @@ impl LivenessContext<'_, '_, '_, '_, 'tcx> {
fn add_use_live_facts_for(
&mut self,
value: impl TypeFoldable<'tcx>,
live_at: &BitSet<PointIndex>,
live_at: &HybridBitSet<PointIndex>,
) {
debug!("add_use_live_facts_for(value={:?})", value);
@ -465,7 +465,7 @@ impl LivenessContext<'_, '_, '_, '_, 'tcx> {
dropped_local: Local,
dropped_ty: Ty<'tcx>,
drop_locations: &[Location],
live_at: &BitSet<PointIndex>,
live_at: &HybridBitSet<PointIndex>,
) {
debug!(
"add_drop_live_constraint(\
@ -508,7 +508,7 @@ impl LivenessContext<'_, '_, '_, '_, 'tcx> {
elements: &RegionValueElements,
typeck: &mut TypeChecker<'_, '_, 'tcx>,
value: impl TypeFoldable<'tcx>,
live_at: &BitSet<PointIndex>,
live_at: &HybridBitSet<PointIndex>,
) {
debug!("make_all_regions_live(value={:?})", value);
debug!(