Adapt table sizes to the contents

This commit is contained in:
Ben Kimock 2023-07-10 10:45:24 -04:00
parent a161ab00db
commit 2ae197d5fc
4 changed files with 102 additions and 45 deletions

View file

@ -360,8 +360,8 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
self.read_lazy_offset_then(|pos| LazyArray::from_position_and_num_elems(pos, len))
}
fn read_lazy_table<I, T>(&mut self, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, len))
fn read_lazy_table<I, T>(&mut self, width: usize, len: usize) -> LazyTable<I, T> {
self.read_lazy_offset_then(|pos| LazyTable::from_position_and_encoded_size(pos, width, len))
}
#[inline]
@ -665,8 +665,9 @@ impl<'a, 'tcx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyArray<T> {
impl<'a, 'tcx, I: Idx, T> Decodable<DecodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn decode(decoder: &mut DecodeContext<'a, 'tcx>) -> Self {
let width = decoder.read_usize();
let len = decoder.read_usize();
decoder.read_lazy_table(len)
decoder.read_lazy_table(width, len)
}
}

View file

@ -129,7 +129,8 @@ impl<'a, 'tcx, T> Encodable<EncodeContext<'a, 'tcx>> for LazyArray<T> {
impl<'a, 'tcx, I, T> Encodable<EncodeContext<'a, 'tcx>> for LazyTable<I, T> {
fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) {
e.emit_usize(self.encoded_size);
e.emit_usize(self.width);
e.emit_usize(self.len);
e.emit_lazy_distance(self.position);
}
}

View file

@ -136,7 +136,8 @@ impl<T> LazyArray<T> {
/// eagerly and in-order.
struct LazyTable<I, T> {
position: NonZeroUsize,
encoded_size: usize,
width: usize,
len: usize,
_marker: PhantomData<fn(I) -> T>,
}
@ -147,9 +148,10 @@ impl<I: 'static, T: ParameterizedOverTcx> ParameterizedOverTcx for LazyTable<I,
impl<I, T> LazyTable<I, T> {
fn from_position_and_encoded_size(
position: NonZeroUsize,
encoded_size: usize,
width: usize,
len: usize,
) -> LazyTable<I, T> {
LazyTable { position, encoded_size, _marker: PhantomData }
LazyTable { position, width, len, _marker: PhantomData }
}
}

View file

@ -38,6 +38,12 @@ impl IsDefault for u32 {
}
}
impl IsDefault for u64 {
fn is_default(&self) -> bool {
*self == 0
}
}
impl<T> IsDefault for LazyArray<T> {
fn is_default(&self) -> bool {
self.num_elems == 0
@ -89,6 +95,20 @@ impl FixedSizeEncoding for u32 {
}
}
impl FixedSizeEncoding for u64 {
type ByteArray = [u8; 8];
#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
Self::from_le_bytes(*b)
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
*b = self.to_le_bytes();
}
}
macro_rules! fixed_size_enum {
($ty:ty { $(($($pat:tt)*))* }) => {
impl FixedSizeEncoding for Option<$ty> {
@ -299,21 +319,21 @@ impl FixedSizeEncoding for UnusedGenericParams {
// generic `LazyValue<T>` impl, but in the general case we might not need / want
// to fit every `usize` in `u32`.
impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
type ByteArray = [u8; 4];
type ByteArray = [u8; 8];
#[inline]
fn from_bytes(b: &[u8; 4]) -> Self {
let position = NonZeroUsize::new(u32::from_bytes(b) as usize)?;
fn from_bytes(b: &[u8; 8]) -> Self {
let position = NonZeroUsize::new(u64::from_bytes(b) as usize)?;
Some(LazyValue::from_position(position))
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 4]) {
fn write_to_bytes(self, b: &mut [u8; 8]) {
match self {
None => unreachable!(),
Some(lazy) => {
let position = lazy.position.get();
let position: u32 = position.try_into().unwrap();
let position: u64 = position.try_into().unwrap();
position.write_to_bytes(b)
}
}
@ -322,55 +342,67 @@ impl<T> FixedSizeEncoding for Option<LazyValue<T>> {
impl<T> LazyArray<T> {
#[inline]
fn write_to_bytes_impl(self, b: &mut [u8; 8]) {
let ([position_bytes, meta_bytes], []) = b.as_chunks_mut::<4>() else { panic!() };
fn write_to_bytes_impl(self, b: &mut [u8; 16]) {
let position = (self.position.get() as u64).to_le_bytes();
let len = (self.num_elems as u64).to_le_bytes();
let position = self.position.get();
let position: u32 = position.try_into().unwrap();
position.write_to_bytes(position_bytes);
let len = self.num_elems;
let len: u32 = len.try_into().unwrap();
len.write_to_bytes(meta_bytes);
for i in 0..8 {
b[2 * i] = position[i];
b[2 * i + 1] = len[i];
}
}
fn from_bytes_impl(position_bytes: &[u8; 4], meta_bytes: &[u8; 4]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u32::from_bytes(position_bytes) as usize)?;
let len = u32::from_bytes(meta_bytes) as usize;
fn from_bytes_impl(position: &[u8; 8], meta: &[u8; 8]) -> Option<LazyArray<T>> {
let position = NonZeroUsize::new(u64::from_bytes(&position) as usize)?;
let len = u64::from_bytes(&meta) as usize;
Some(LazyArray::from_position_and_num_elems(position, len))
}
}
impl<T> FixedSizeEncoding for LazyArray<T> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];
#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
if *meta_bytes == [0; 4] {
fn from_bytes(b: &[u8; 16]) -> Self {
let mut position = [0u8; 8];
let mut meta = [0u8; 8];
for i in 0..8 {
position[i] = b[2 * i];
meta[i] = b[2 * i + 1];
}
if meta == [0; 8] {
return Default::default();
}
LazyArray::from_bytes_impl(position_bytes, meta_bytes).unwrap()
LazyArray::from_bytes_impl(&position, &meta).unwrap()
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
assert!(!self.is_default());
self.write_to_bytes_impl(b)
}
}
impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
type ByteArray = [u8; 8];
type ByteArray = [u8; 16];
#[inline]
fn from_bytes(b: &[u8; 8]) -> Self {
let ([position_bytes, meta_bytes], []) = b.as_chunks::<4>() else { panic!() };
LazyArray::from_bytes_impl(position_bytes, meta_bytes)
fn from_bytes(b: &[u8; 16]) -> Self {
let mut position = [0u8; 8];
let mut meta = [0u8; 8];
for i in 0..8 {
position[i] = b[2 * i];
meta[i] = b[2 * i + 1];
}
LazyArray::from_bytes_impl(&position, &meta)
}
#[inline]
fn write_to_bytes(self, b: &mut [u8; 8]) {
fn write_to_bytes(self, b: &mut [u8; 16]) {
match self {
None => unreachable!(),
Some(lazy) => lazy.write_to_bytes_impl(b),
@ -380,13 +412,14 @@ impl<T> FixedSizeEncoding for Option<LazyArray<T>> {
/// Helper for constructing a table's serialization (also see `Table`).
pub(super) struct TableBuilder<I: Idx, T: FixedSizeEncoding> {
width: usize,
blocks: IndexVec<I, T::ByteArray>,
_marker: PhantomData<T>,
}
impl<I: Idx, T: FixedSizeEncoding> Default for TableBuilder<I, T> {
fn default() -> Self {
TableBuilder { blocks: Default::default(), _marker: PhantomData }
TableBuilder { width: 0, blocks: Default::default(), _marker: PhantomData }
}
}
@ -414,22 +447,33 @@ impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]>> TableBui
// > store bit-masks of which item in each bucket is actually serialized).
let block = self.blocks.ensure_contains_elem(i, || [0; N]);
value.write_to_bytes(block);
if self.width != N {
let width = N - trailing_zeros(block);
self.width = self.width.max(width);
}
}
}
pub(crate) fn encode(&self, buf: &mut FileEncoder) -> LazyTable<I, T> {
let pos = buf.position();
let width = self.width;
for block in &self.blocks {
buf.emit_raw_bytes(block);
buf.emit_raw_bytes(&block[..width]);
}
let num_bytes = self.blocks.len() * N;
LazyTable::from_position_and_encoded_size(
NonZeroUsize::new(pos as usize).unwrap(),
num_bytes,
width,
self.blocks.len(),
)
}
}
fn trailing_zeros(x: &[u8]) -> usize {
x.iter().rev().take_while(|b| **b == 0).count()
}
impl<I: Idx, const N: usize, T: FixedSizeEncoding<ByteArray = [u8; N]> + ParameterizedOverTcx>
LazyTable<I, T>
where
@ -438,16 +482,25 @@ where
/// Given the metadata, extract out the value at a particular index (if any).
#[inline(never)]
pub(super) fn get<'a, 'tcx, M: Metadata<'a, 'tcx>>(&self, metadata: M, i: I) -> T::Value<'tcx> {
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.encoded_size);
trace!("LazyTable::lookup: index={:?} len={:?}", i, self.len);
let start = self.position.get();
let bytes = &metadata.blob()[start..start + self.encoded_size];
let (bytes, []) = bytes.as_chunks::<N>() else { panic!() };
bytes.get(i.index()).map_or_else(Default::default, FixedSizeEncoding::from_bytes)
// Access past the end of the table returns a Default
if i.index() >= self.len {
return Default::default();
}
let width = self.width;
let start = self.position.get() + (width * i.index());
let end = start + width;
let bytes = &metadata.blob()[start..end];
let mut fixed = [0u8; N];
fixed[..width].copy_from_slice(bytes);
FixedSizeEncoding::from_bytes(&fixed)
}
/// Size of the table in entries, including possible gaps.
pub(super) fn size(&self) -> usize {
self.encoded_size / N
self.len
}
}