Add index to the dep graph format and encode via MemEncoder
This commit is contained in:
parent
6bc57c6bf7
commit
c6735f92c1
3 changed files with 206 additions and 39 deletions
|
|
@ -46,6 +46,7 @@ use rustc_data_structures::profiling::SelfProfilerRef;
|
|||
use rustc_data_structures::sync::Lock;
|
||||
use rustc_data_structures::unhash::UnhashMap;
|
||||
use rustc_index::{Idx, IndexVec};
|
||||
use rustc_serialize::opaque::mem_encoder::MemEncoder;
|
||||
use rustc_serialize::opaque::{FileEncodeResult, FileEncoder, IntEncodedWithFixedSize, MemDecoder};
|
||||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
|
||||
use tracing::{debug, instrument};
|
||||
|
|
@ -105,18 +106,11 @@ impl SerializedDepGraph {
|
|||
) -> impl Iterator<Item = SerializedDepNodeIndex> + Clone {
|
||||
let header = self.edge_list_indices[source];
|
||||
let mut raw = &self.edge_list_data[header.start()..];
|
||||
// Figure out where the edge list for `source` ends by getting the start index of the next
|
||||
// edge list, or the end of the array if this is the last edge.
|
||||
let end = self
|
||||
.edge_list_indices
|
||||
.get(source + 1)
|
||||
.map(|h| h.start())
|
||||
.unwrap_or_else(|| self.edge_list_data.len() - DEP_NODE_PAD);
|
||||
|
||||
// The number of edges for this node is implicitly stored in the combination of the byte
|
||||
// width and the length.
|
||||
let bytes_per_index = header.bytes_per_index();
|
||||
let len = (end - header.start()) / bytes_per_index;
|
||||
let len = header.edges;
|
||||
|
||||
// LLVM doesn't hoist EdgeHeader::mask so we do it ourselves.
|
||||
let mask = header.mask();
|
||||
|
|
@ -163,6 +157,7 @@ impl SerializedDepGraph {
|
|||
#[derive(Debug, Clone, Copy)]
|
||||
struct EdgeHeader {
|
||||
repr: usize,
|
||||
edges: u32,
|
||||
}
|
||||
|
||||
impl EdgeHeader {
|
||||
|
|
@ -205,9 +200,17 @@ impl SerializedDepGraph {
|
|||
|
||||
let graph_bytes = d.len() - (2 * IntEncodedWithFixedSize::ENCODED_SIZE) - d.position();
|
||||
|
||||
let mut nodes = IndexVec::with_capacity(node_count);
|
||||
let mut fingerprints = IndexVec::with_capacity(node_count);
|
||||
let mut edge_list_indices = IndexVec::with_capacity(node_count);
|
||||
let mut nodes: IndexVec<SerializedDepNodeIndex, _> = (0..node_count)
|
||||
.map(|_| DepNode {
|
||||
kind: D::DEP_KIND_NULL,
|
||||
hash: PackedFingerprint::from(Fingerprint::ZERO),
|
||||
})
|
||||
.collect();
|
||||
let mut fingerprints: IndexVec<SerializedDepNodeIndex, _> =
|
||||
(0..node_count).map(|_| Fingerprint::ZERO).collect();
|
||||
let mut edge_list_indices: IndexVec<SerializedDepNodeIndex, _> =
|
||||
(0..node_count).map(|_| EdgeHeader { repr: 0, edges: 0 }).collect();
|
||||
|
||||
// This estimation assumes that all of the encoded bytes are for the edge lists or for the
|
||||
// fixed-size node headers. But that's not necessarily true; if any edge list has a length
|
||||
// that spills out of the size we can bit-pack into SerializedNodeHeader then some of the
|
||||
|
|
@ -226,11 +229,10 @@ impl SerializedDepGraph {
|
|||
let node_header =
|
||||
SerializedNodeHeader::<D> { bytes: d.read_array(), _marker: PhantomData };
|
||||
|
||||
let _i: SerializedDepNodeIndex = nodes.push(node_header.node());
|
||||
debug_assert_eq!(_i.index(), _index);
|
||||
let index = node_header.index();
|
||||
|
||||
let _i: SerializedDepNodeIndex = fingerprints.push(node_header.fingerprint());
|
||||
debug_assert_eq!(_i.index(), _index);
|
||||
nodes[index] = node_header.node();
|
||||
fingerprints[index] = node_header.fingerprint();
|
||||
|
||||
// If the length of this node's edge list is small, the length is stored in the header.
|
||||
// If it is not, we fall back to another decoder call.
|
||||
|
|
@ -242,12 +244,11 @@ impl SerializedDepGraph {
|
|||
let edges_len_bytes = node_header.bytes_per_index() * (num_edges as usize);
|
||||
// The in-memory structure for the edges list stores the byte width of the edges on
|
||||
// this node with the offset into the global edge data array.
|
||||
let edges_header = node_header.edges_header(&edge_list_data);
|
||||
let edges_header = node_header.edges_header(&edge_list_data, num_edges);
|
||||
|
||||
edge_list_data.extend(d.read_raw_bytes(edges_len_bytes));
|
||||
|
||||
let _i: SerializedDepNodeIndex = edge_list_indices.push(edges_header);
|
||||
debug_assert_eq!(_i.index(), _index);
|
||||
edge_list_indices[index] = edges_header;
|
||||
}
|
||||
|
||||
// When we access the edge list data, we do a fixed-size read from the edge list data then
|
||||
|
|
@ -298,9 +299,10 @@ impl SerializedDepGraph {
|
|||
/// * In whatever bits remain, the length of the edge list for this node, if it fits
|
||||
struct SerializedNodeHeader<D> {
|
||||
// 2 bytes for the DepNode
|
||||
// 4 bytes for the index
|
||||
// 16 for Fingerprint in DepNode
|
||||
// 16 for Fingerprint in NodeInfo
|
||||
bytes: [u8; 34],
|
||||
bytes: [u8; 38],
|
||||
_marker: PhantomData<D>,
|
||||
}
|
||||
|
||||
|
|
@ -310,6 +312,7 @@ struct Unpacked {
|
|||
len: Option<u32>,
|
||||
bytes_per_index: usize,
|
||||
kind: DepKind,
|
||||
index: SerializedDepNodeIndex,
|
||||
hash: PackedFingerprint,
|
||||
fingerprint: Fingerprint,
|
||||
}
|
||||
|
|
@ -331,6 +334,7 @@ impl<D: Deps> SerializedNodeHeader<D> {
|
|||
#[inline]
|
||||
fn new(
|
||||
node: DepNode,
|
||||
index: DepNodeIndex,
|
||||
fingerprint: Fingerprint,
|
||||
edge_max_index: u32,
|
||||
edge_count: usize,
|
||||
|
|
@ -352,10 +356,11 @@ impl<D: Deps> SerializedNodeHeader<D> {
|
|||
let hash: Fingerprint = node.hash.into();
|
||||
|
||||
// Using half-open ranges ensures an unconditional panic if we get the magic numbers wrong.
|
||||
let mut bytes = [0u8; 34];
|
||||
let mut bytes = [0u8; 38];
|
||||
bytes[..2].copy_from_slice(&head.to_le_bytes());
|
||||
bytes[2..18].copy_from_slice(&hash.to_le_bytes());
|
||||
bytes[18..].copy_from_slice(&fingerprint.to_le_bytes());
|
||||
bytes[2..6].copy_from_slice(&index.as_u32().to_le_bytes());
|
||||
bytes[6..22].copy_from_slice(&hash.to_le_bytes());
|
||||
bytes[22..].copy_from_slice(&fingerprint.to_le_bytes());
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
|
|
@ -372,8 +377,9 @@ impl<D: Deps> SerializedNodeHeader<D> {
|
|||
#[inline]
|
||||
fn unpack(&self) -> Unpacked {
|
||||
let head = u16::from_le_bytes(self.bytes[..2].try_into().unwrap());
|
||||
let hash = self.bytes[2..18].try_into().unwrap();
|
||||
let fingerprint = self.bytes[18..].try_into().unwrap();
|
||||
let index = u32::from_le_bytes(self.bytes[2..6].try_into().unwrap());
|
||||
let hash = self.bytes[6..22].try_into().unwrap();
|
||||
let fingerprint = self.bytes[22..].try_into().unwrap();
|
||||
|
||||
let kind = head & mask(Self::KIND_BITS) as u16;
|
||||
let bytes_per_index = (head >> Self::KIND_BITS) & mask(Self::WIDTH_BITS) as u16;
|
||||
|
|
@ -383,6 +389,7 @@ impl<D: Deps> SerializedNodeHeader<D> {
|
|||
len: len.checked_sub(1),
|
||||
bytes_per_index: bytes_per_index as usize + 1,
|
||||
kind: DepKind::new(kind),
|
||||
index: SerializedDepNodeIndex::from_u32(index),
|
||||
hash: Fingerprint::from_le_bytes(hash).into(),
|
||||
fingerprint: Fingerprint::from_le_bytes(fingerprint),
|
||||
}
|
||||
|
|
@ -398,6 +405,11 @@ impl<D: Deps> SerializedNodeHeader<D> {
|
|||
self.unpack().bytes_per_index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn index(&self) -> SerializedDepNodeIndex {
|
||||
self.unpack().index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fingerprint(&self) -> Fingerprint {
|
||||
self.unpack().fingerprint
|
||||
|
|
@ -410,9 +422,10 @@ impl<D: Deps> SerializedNodeHeader<D> {
|
|||
}
|
||||
|
||||
#[inline]
|
||||
fn edges_header(&self, edge_list_data: &[u8]) -> EdgeHeader {
|
||||
fn edges_header(&self, edge_list_data: &[u8], edges: u32) -> EdgeHeader {
|
||||
EdgeHeader {
|
||||
repr: (edge_list_data.len() << DEP_NODE_WIDTH_BITS) | (self.bytes_per_index() - 1),
|
||||
edges,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -425,10 +438,15 @@ struct NodeInfo {
|
|||
}
|
||||
|
||||
impl NodeInfo {
|
||||
fn encode<D: Deps>(&self, e: &mut FileEncoder) {
|
||||
fn encode<D: Deps>(&self, e: &mut MemEncoder, index: DepNodeIndex) {
|
||||
let NodeInfo { node, fingerprint, ref edges } = *self;
|
||||
let header =
|
||||
SerializedNodeHeader::<D>::new(node, fingerprint, edges.max_index(), edges.len());
|
||||
let header = SerializedNodeHeader::<D>::new(
|
||||
node,
|
||||
index,
|
||||
fingerprint,
|
||||
edges.max_index(),
|
||||
edges.len(),
|
||||
);
|
||||
e.write_array(header.bytes);
|
||||
|
||||
if header.len().is_none() {
|
||||
|
|
@ -450,8 +468,9 @@ impl NodeInfo {
|
|||
/// This avoids the overhead of constructing `EdgesVec`, which would be needed to call `encode`.
|
||||
#[inline]
|
||||
fn encode_promoted<D: Deps>(
|
||||
e: &mut FileEncoder,
|
||||
e: &mut MemEncoder,
|
||||
node: DepNode,
|
||||
index: DepNodeIndex,
|
||||
fingerprint: Fingerprint,
|
||||
prev_index: SerializedDepNodeIndex,
|
||||
colors: &DepNodeColorMap,
|
||||
|
|
@ -464,7 +483,7 @@ impl NodeInfo {
|
|||
let edge_max =
|
||||
edges.clone().map(|i| colors.current(i).unwrap().as_u32()).max().unwrap_or(0);
|
||||
|
||||
let header = SerializedNodeHeader::<D>::new(node, fingerprint, edge_max, edge_count);
|
||||
let header = SerializedNodeHeader::<D>::new(node, index, fingerprint, edge_max, edge_count);
|
||||
e.write_array(header.bytes);
|
||||
|
||||
if header.len().is_none() {
|
||||
|
|
@ -498,6 +517,8 @@ struct EncoderState<D: Deps> {
|
|||
total_edge_count: usize,
|
||||
stats: Option<FxHashMap<DepKind, Stat>>,
|
||||
|
||||
mem_encoder: MemEncoder,
|
||||
|
||||
/// Stores the number of times we've encoded each dep kind.
|
||||
kind_stats: Vec<u32>,
|
||||
marker: PhantomData<D>,
|
||||
|
|
@ -511,22 +532,28 @@ impl<D: Deps> EncoderState<D> {
|
|||
total_edge_count: 0,
|
||||
total_node_count: 0,
|
||||
stats: record_stats.then(FxHashMap::default),
|
||||
mem_encoder: MemEncoder::new(),
|
||||
kind_stats: iter::repeat(0).take(D::DEP_KIND_MAX as usize + 1).collect(),
|
||||
marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn alloc_index(&mut self) -> DepNodeIndex {
|
||||
let index = DepNodeIndex::new(self.total_node_count);
|
||||
self.total_node_count += 1;
|
||||
index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn record(
|
||||
&mut self,
|
||||
node: DepNode,
|
||||
index: DepNodeIndex,
|
||||
edge_count: usize,
|
||||
edges: impl FnOnce(&mut Self) -> Vec<DepNodeIndex>,
|
||||
record_graph: &Option<Lock<DepGraphQuery>>,
|
||||
) -> DepNodeIndex {
|
||||
let index = DepNodeIndex::new(self.total_node_count);
|
||||
|
||||
self.total_node_count += 1;
|
||||
self.kind_stats[node.kind.as_usize()] += 1;
|
||||
self.total_edge_count += edge_count;
|
||||
|
||||
|
|
@ -558,14 +585,25 @@ impl<D: Deps> EncoderState<D> {
|
|||
index
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn flush_mem_encoder(&mut self) {
|
||||
let data = &mut self.mem_encoder.data;
|
||||
if data.len() > 64 * 1024 {
|
||||
self.encoder.emit_raw_bytes(&data[..]);
|
||||
data.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// Encodes a node to the current graph.
|
||||
fn encode_node(
|
||||
&mut self,
|
||||
node: &NodeInfo,
|
||||
record_graph: &Option<Lock<DepGraphQuery>>,
|
||||
) -> DepNodeIndex {
|
||||
node.encode::<D>(&mut self.encoder);
|
||||
self.record(node.node, node.edges.len(), |_| node.edges[..].to_vec(), record_graph)
|
||||
let index = self.alloc_index();
|
||||
node.encode::<D>(&mut self.mem_encoder, index);
|
||||
self.flush_mem_encoder();
|
||||
self.record(node.node, index, node.edges.len(), |_| node.edges[..].to_vec(), record_graph)
|
||||
}
|
||||
|
||||
/// Encodes a node that was promoted from the previous graph. It reads the information directly from
|
||||
|
|
@ -581,20 +619,22 @@ impl<D: Deps> EncoderState<D> {
|
|||
record_graph: &Option<Lock<DepGraphQuery>>,
|
||||
colors: &DepNodeColorMap,
|
||||
) -> DepNodeIndex {
|
||||
let index = self.alloc_index();
|
||||
let node = self.previous.index_to_node(prev_index);
|
||||
|
||||
let fingerprint = self.previous.fingerprint_by_index(prev_index);
|
||||
let edge_count = NodeInfo::encode_promoted::<D>(
|
||||
&mut self.encoder,
|
||||
&mut self.mem_encoder,
|
||||
node,
|
||||
index,
|
||||
fingerprint,
|
||||
prev_index,
|
||||
colors,
|
||||
&self.previous,
|
||||
);
|
||||
|
||||
self.flush_mem_encoder();
|
||||
self.record(
|
||||
node,
|
||||
index,
|
||||
edge_count,
|
||||
|this| {
|
||||
this.previous
|
||||
|
|
@ -603,12 +643,14 @@ impl<D: Deps> EncoderState<D> {
|
|||
.collect()
|
||||
},
|
||||
record_graph,
|
||||
)
|
||||
);
|
||||
index
|
||||
}
|
||||
|
||||
fn finish(self, profiler: &SelfProfilerRef) -> FileEncodeResult {
|
||||
let Self {
|
||||
mut encoder,
|
||||
mem_encoder,
|
||||
total_node_count,
|
||||
total_edge_count,
|
||||
stats: _,
|
||||
|
|
@ -617,6 +659,8 @@ impl<D: Deps> EncoderState<D> {
|
|||
previous,
|
||||
} = self;
|
||||
|
||||
encoder.emit_raw_bytes(&mem_encoder.data);
|
||||
|
||||
let node_count = total_node_count.try_into().unwrap();
|
||||
let edge_count = total_edge_count.try_into().unwrap();
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,8 @@ use crate::int_overflow::DebugStrictAdd;
|
|||
use crate::leb128;
|
||||
use crate::serialize::{Decodable, Decoder, Encodable, Encoder};
|
||||
|
||||
pub mod mem_encoder;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
// Encoder
|
||||
// -----------------------------------------------------------------------------
|
||||
|
|
|
|||
121
compiler/rustc_serialize/src/opaque/mem_encoder.rs
Normal file
121
compiler/rustc_serialize/src/opaque/mem_encoder.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use super::IntEncodedWithFixedSize;
|
||||
use crate::{Encodable, Encoder, leb128};
|
||||
|
||||
pub struct MemEncoder {
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl MemEncoder {
|
||||
pub fn new() -> MemEncoder {
|
||||
MemEncoder { data: vec![] }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn position(&self) -> usize {
|
||||
self.data.len()
|
||||
}
|
||||
|
||||
pub fn finish(self) -> Vec<u8> {
|
||||
self.data
|
||||
}
|
||||
|
||||
/// Write up to `N` bytes to this encoder.
|
||||
///
|
||||
/// This function can be used to avoid the overhead of calling memcpy for writes that
|
||||
/// have runtime-variable length, but are small and have a small fixed upper bound.
|
||||
///
|
||||
/// This can be used to do in-place encoding as is done for leb128 (without this function
|
||||
/// we would need to write to a temporary buffer then memcpy into the encoder), and it can
|
||||
/// also be used to implement the varint scheme we use for rmeta and dep graph encoding,
|
||||
/// where we only want to encode the first few bytes of an integer. Note that common
|
||||
/// architectures support fixed-size writes up to 8 bytes with one instruction, so while this
|
||||
/// does in some sense do wasted work, we come out ahead.
|
||||
#[inline]
|
||||
pub fn write_with<const N: usize>(&mut self, visitor: impl FnOnce(&mut [u8; N]) -> usize) {
|
||||
self.data.reserve(N);
|
||||
|
||||
let old_len = self.data.len();
|
||||
// SAFETY: fix
|
||||
let buf = unsafe { &mut *(self.data.as_mut_ptr().add(old_len) as *mut [u8; N]) };
|
||||
let written = visitor(buf);
|
||||
if written > N {
|
||||
Self::panic_invalid_write::<N>(written);
|
||||
}
|
||||
unsafe { self.data.set_len(old_len + written) };
|
||||
}
|
||||
|
||||
#[cold]
|
||||
#[inline(never)]
|
||||
fn panic_invalid_write<const N: usize>(written: usize) {
|
||||
panic!("MemEncoder::write_with::<{N}> cannot be used to write {written} bytes");
|
||||
}
|
||||
|
||||
/// Helper for calls where [`MemEncoder::write_with`] always writes the whole array.
|
||||
#[inline]
|
||||
pub fn write_array<const N: usize>(&mut self, buf: [u8; N]) {
|
||||
self.write_with(|dest| {
|
||||
*dest = buf;
|
||||
N
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! write_leb128 {
|
||||
($this_fn:ident, $int_ty:ty, $write_leb_fn:ident) => {
|
||||
#[inline]
|
||||
fn $this_fn(&mut self, v: $int_ty) {
|
||||
self.write_with(|buf| leb128::$write_leb_fn(buf, v))
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl Encoder for MemEncoder {
|
||||
write_leb128!(emit_usize, usize, write_usize_leb128);
|
||||
write_leb128!(emit_u128, u128, write_u128_leb128);
|
||||
write_leb128!(emit_u64, u64, write_u64_leb128);
|
||||
write_leb128!(emit_u32, u32, write_u32_leb128);
|
||||
|
||||
#[inline]
|
||||
fn emit_u16(&mut self, v: u16) {
|
||||
self.write_array(v.to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn emit_u8(&mut self, v: u8) {
|
||||
self.write_array([v]);
|
||||
}
|
||||
|
||||
write_leb128!(emit_isize, isize, write_isize_leb128);
|
||||
write_leb128!(emit_i128, i128, write_i128_leb128);
|
||||
write_leb128!(emit_i64, i64, write_i64_leb128);
|
||||
write_leb128!(emit_i32, i32, write_i32_leb128);
|
||||
|
||||
#[inline]
|
||||
fn emit_i16(&mut self, v: i16) {
|
||||
self.write_array(v.to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn emit_raw_bytes(&mut self, s: &[u8]) {
|
||||
self.data.extend_from_slice(s);
|
||||
}
|
||||
}
|
||||
|
||||
// Specialize encoding byte slices. This specialization also applies to encoding `Vec<u8>`s, etc.,
|
||||
// since the default implementations call `encode` on their slices internally.
|
||||
impl Encodable<MemEncoder> for [u8] {
|
||||
fn encode(&self, e: &mut MemEncoder) {
|
||||
Encoder::emit_usize(e, self.len());
|
||||
e.emit_raw_bytes(self);
|
||||
}
|
||||
}
|
||||
|
||||
impl Encodable<MemEncoder> for IntEncodedWithFixedSize {
|
||||
#[inline]
|
||||
fn encode(&self, e: &mut MemEncoder) {
|
||||
let start_pos = e.position();
|
||||
e.write_array(self.0.to_le_bytes());
|
||||
let end_pos = e.position();
|
||||
debug_assert_eq!((end_pos - start_pos), IntEncodedWithFixedSize::ENCODED_SIZE);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue