From f79439caedee7ed00252aff8d4d4fae8fac0c597 Mon Sep 17 00:00:00 2001 From: Lukas Wirth Date: Thu, 29 Jun 2023 12:23:45 +0200 Subject: [PATCH] Infect proc-macro-api crate with generic span type parameter --- crates/proc-macro-api/src/lib.rs | 12 +- crates/proc-macro-api/src/msg.rs | 25 ++- crates/proc-macro-api/src/msg/flat.rs | 230 ++++++++++++++++++-------- 3 files changed, 181 insertions(+), 86 deletions(-) diff --git a/crates/proc-macro-api/src/lib.rs b/crates/proc-macro-api/src/lib.rs index 1603458f756e..7a3580f814e2 100644 --- a/crates/proc-macro-api/src/lib.rs +++ b/crates/proc-macro-api/src/lib.rs @@ -17,10 +17,8 @@ use triomphe::Arc; use serde::{Deserialize, Serialize}; -use ::tt::token_id as tt; - use crate::{ - msg::{ExpandMacro, FlatTree, PanicMessage}, + msg::{flat::SerializableSpan, ExpandMacro, FlatTree, PanicMessage}, process::ProcMacroProcessSrv, }; @@ -134,12 +132,12 @@ impl ProcMacro { self.kind } - pub fn expand( + pub fn expand>( &self, - subtree: &tt::Subtree, - attr: Option<&tt::Subtree>, + subtree: &tt::Subtree, + attr: Option<&tt::Subtree>, env: Vec<(String, String)>, - ) -> Result, ServerError> { + ) -> Result, PanicMessage>, ServerError> { let version = self.process.lock().unwrap_or_else(|e| e.into_inner()).version(); let current_dir = env .iter() diff --git a/crates/proc-macro-api/src/msg.rs b/crates/proc-macro-api/src/msg.rs index 4b01643c2a29..f0719777ab4d 100644 --- a/crates/proc-macro-api/src/msg.rs +++ b/crates/proc-macro-api/src/msg.rs @@ -16,8 +16,15 @@ pub use crate::msg::flat::FlatTree; pub const NO_VERSION_CHECK_VERSION: u32 = 0; pub const VERSION_CHECK_VERSION: u32 = 1; pub const ENCODE_CLOSE_SPAN_VERSION: u32 = 2; +/// This version changes how spans are encoded, kind of. Prior to this version, +/// spans were represented as a single u32 which effectively forced spans to be +/// token ids. Starting with this version, the span fields are still u32, +/// but if the size of the span is greater than 1 then the span data is encoded in +/// an additional vector where the span represents the offset into that vector. +/// This allows encoding bigger spans while supporting the previous versions. +pub const VARIABLE_SIZED_SPANS: u32 = 2; -pub const CURRENT_API_VERSION: u32 = ENCODE_CLOSE_SPAN_VERSION; +pub const CURRENT_API_VERSION: u32 = VARIABLE_SIZED_SPANS; #[derive(Debug, Serialize, Deserialize)] pub enum Request { @@ -115,10 +122,14 @@ fn write_json(out: &mut impl Write, msg: &str) -> io::Result<()> { #[cfg(test)] mod tests { - use super::*; - use crate::tt::*; + use tt::{ + Delimiter, DelimiterKind, Ident, Leaf, Literal, Punct, Spacing, Span, Subtree, TokenId, + TokenTree, + }; - fn fixture_token_tree() -> Subtree { + use super::*; + + fn fixture_token_tree() -> Subtree { let mut subtree = Subtree { delimiter: Delimiter::unspecified(), token_trees: Vec::new() }; subtree .token_trees @@ -128,17 +139,17 @@ mod tests { .push(TokenTree::Leaf(Ident { text: "Foo".into(), span: TokenId(1) }.into())); subtree.token_trees.push(TokenTree::Leaf(Leaf::Literal(Literal { text: "Foo".into(), - span: TokenId::unspecified(), + span: TokenId::DUMMY, }))); subtree.token_trees.push(TokenTree::Leaf(Leaf::Punct(Punct { char: '@', - span: TokenId::unspecified(), + span: TokenId::DUMMY, spacing: Spacing::Joint, }))); subtree.token_trees.push(TokenTree::Subtree(Subtree { delimiter: Delimiter { open: TokenId(2), - close: TokenId::UNSPECIFIED, + close: TokenId::DUMMY, kind: DelimiterKind::Brace, }, token_trees: vec![], diff --git a/crates/proc-macro-api/src/msg/flat.rs b/crates/proc-macro-api/src/msg/flat.rs index 44245336f016..fe82b8d04542 100644 --- a/crates/proc-macro-api/src/msg/flat.rs +++ b/crates/proc-macro-api/src/msg/flat.rs @@ -38,11 +38,22 @@ use std::collections::{HashMap, VecDeque}; use serde::{Deserialize, Serialize}; +use tt::Span; -use crate::{ - msg::ENCODE_CLOSE_SPAN_VERSION, - tt::{self, TokenId}, -}; +use crate::msg::{ENCODE_CLOSE_SPAN_VERSION, VARIABLE_SIZED_SPANS}; + +pub trait SerializableSpan: Span { + fn into_u32(self) -> [u32; L]; + fn from_u32(input: [u32; L]) -> Self; +} +impl SerializableSpan<1> for tt::TokenId { + fn into_u32(self) -> [u32; 1] { + [self.0] + } + fn from_u32([input]: [u32; 1]) -> Self { + tt::TokenId(input) + } +} #[derive(Serialize, Deserialize, Debug)] pub struct FlatTree { @@ -52,33 +63,79 @@ pub struct FlatTree { ident: Vec, token_tree: Vec, text: Vec, + #[serde(skip_serializing_if = "SpanMap::do_serialize")] + #[serde(default)] + span_map: SpanMap, } -struct SubtreeRepr { - open: tt::TokenId, - close: tt::TokenId, +#[derive(Serialize, Deserialize, Debug)] +pub struct SpanMap { + #[serde(skip_serializing)] + serialize: bool, + span_size: u32, + spans: Vec, +} + +impl Default for SpanMap { + fn default() -> Self { + Self { serialize: false, span_size: 1, spans: Default::default() } + } +} + +impl SpanMap { + fn serialize_span>(&mut self, span: S) -> u32 { + let u32s = span.into_u32(); + if L == 1 { + u32s[0] + } else { + let offset = self.spans.len() as u32; + self.spans.extend(u32s); + offset + } + } + fn deserialize_span>(&self, offset: u32) -> S { + S::from_u32(if L == 1 { + [offset].as_ref().try_into().unwrap() + } else { + self.spans[offset as usize..][..L].try_into().unwrap() + }) + } +} + +impl SpanMap { + pub fn do_serialize(&self) -> bool { + self.serialize + } +} + +struct SubtreeRepr { + open: S, + close: S, kind: tt::DelimiterKind, tt: [u32; 2], } -struct LiteralRepr { - id: tt::TokenId, +struct LiteralRepr { + id: S, text: u32, } -struct PunctRepr { - id: tt::TokenId, +struct PunctRepr { + id: S, char: char, spacing: tt::Spacing, } -struct IdentRepr { - id: tt::TokenId, +struct IdentRepr { + id: S, text: u32, } impl FlatTree { - pub fn new(subtree: &tt::Subtree, version: u32) -> FlatTree { + pub fn new>( + subtree: &tt::Subtree, + version: u32, + ) -> FlatTree { let mut w = Writer { string_table: HashMap::new(), work: VecDeque::new(), @@ -91,60 +148,78 @@ impl FlatTree { text: Vec::new(), }; w.write(subtree); - + assert!(L == 1 || version >= VARIABLE_SIZED_SPANS); + let mut span_map = SpanMap { + serialize: version >= VARIABLE_SIZED_SPANS && L != 1, + span_size: L as u32, + spans: Vec::new(), + }; return FlatTree { subtree: if version >= ENCODE_CLOSE_SPAN_VERSION { - write_vec(w.subtree, SubtreeRepr::write_with_close_span) + write_vec(&mut span_map, w.subtree, SubtreeRepr::write_with_close_span) } else { - write_vec(w.subtree, SubtreeRepr::write) + write_vec(&mut span_map, w.subtree, SubtreeRepr::write) }, - literal: write_vec(w.literal, LiteralRepr::write), - punct: write_vec(w.punct, PunctRepr::write), - ident: write_vec(w.ident, IdentRepr::write), + literal: write_vec(&mut span_map, w.literal, LiteralRepr::write), + punct: write_vec(&mut span_map, w.punct, PunctRepr::write), + ident: write_vec(&mut span_map, w.ident, IdentRepr::write), token_tree: w.token_tree, text: w.text, + span_map, }; - fn write_vec [u32; N], const N: usize>(xs: Vec, f: F) -> Vec { - xs.into_iter().flat_map(f).collect() + fn write_vec [u32; N], const N: usize>( + map: &mut SpanMap, + xs: Vec, + f: F, + ) -> Vec { + xs.into_iter().flat_map(|it| f(it, map)).collect() } } - pub fn to_subtree(self, version: u32) -> tt::Subtree { + pub fn to_subtree>( + self, + version: u32, + ) -> tt::Subtree { + assert!((version >= VARIABLE_SIZED_SPANS || L == 1) && L as u32 == self.span_map.span_size); return Reader { subtree: if version >= ENCODE_CLOSE_SPAN_VERSION { - read_vec(self.subtree, SubtreeRepr::read_with_close_span) + read_vec(&self.span_map, self.subtree, SubtreeRepr::read_with_close_span) } else { - read_vec(self.subtree, SubtreeRepr::read) + read_vec(&self.span_map, self.subtree, SubtreeRepr::read) }, - literal: read_vec(self.literal, LiteralRepr::read), - punct: read_vec(self.punct, PunctRepr::read), - ident: read_vec(self.ident, IdentRepr::read), + literal: read_vec(&self.span_map, self.literal, LiteralRepr::read), + punct: read_vec(&self.span_map, self.punct, PunctRepr::read), + ident: read_vec(&self.span_map, self.ident, IdentRepr::read), token_tree: self.token_tree, text: self.text, } .read(); - fn read_vec T, const N: usize>(xs: Vec, f: F) -> Vec { + fn read_vec T, const N: usize>( + map: &SpanMap, + xs: Vec, + f: F, + ) -> Vec { let mut chunks = xs.chunks_exact(N); - let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap())).collect(); + let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap(), map)).collect(); assert!(chunks.remainder().is_empty()); res } } } -impl SubtreeRepr { - fn write(self) -> [u32; 4] { +impl> SubtreeRepr { + fn write(self, map: &mut SpanMap) -> [u32; 4] { let kind = match self.kind { tt::DelimiterKind::Invisible => 0, tt::DelimiterKind::Parenthesis => 1, tt::DelimiterKind::Brace => 2, tt::DelimiterKind::Bracket => 3, }; - [self.open.0, kind, self.tt[0], self.tt[1]] + [map.serialize_span(self.open), kind, self.tt[0], self.tt[1]] } - fn read([open, kind, lo, len]: [u32; 4]) -> SubtreeRepr { + fn read([open, kind, lo, len]: [u32; 4], map: &SpanMap) -> Self { let kind = match kind { 0 => tt::DelimiterKind::Invisible, 1 => tt::DelimiterKind::Parenthesis, @@ -152,18 +227,24 @@ impl SubtreeRepr { 3 => tt::DelimiterKind::Bracket, other => panic!("bad kind {other}"), }; - SubtreeRepr { open: TokenId(open), close: TokenId::UNSPECIFIED, kind, tt: [lo, len] } + SubtreeRepr { open: map.deserialize_span(open), close: S::DUMMY, kind, tt: [lo, len] } } - fn write_with_close_span(self) -> [u32; 5] { + fn write_with_close_span(self, map: &mut SpanMap) -> [u32; 5] { let kind = match self.kind { tt::DelimiterKind::Invisible => 0, tt::DelimiterKind::Parenthesis => 1, tt::DelimiterKind::Brace => 2, tt::DelimiterKind::Bracket => 3, }; - [self.open.0, self.close.0, kind, self.tt[0], self.tt[1]] + [ + map.serialize_span(self.open), + map.serialize_span(self.close), + kind, + self.tt[0], + self.tt[1], + ] } - fn read_with_close_span([open, close, kind, lo, len]: [u32; 5]) -> SubtreeRepr { + fn read_with_close_span([open, close, kind, lo, len]: [u32; 5], map: &SpanMap) -> Self { let kind = match kind { 0 => tt::DelimiterKind::Invisible, 1 => tt::DelimiterKind::Parenthesis, @@ -171,67 +252,72 @@ impl SubtreeRepr { 3 => tt::DelimiterKind::Bracket, other => panic!("bad kind {other}"), }; - SubtreeRepr { open: TokenId(open), close: TokenId(close), kind, tt: [lo, len] } + SubtreeRepr { + open: map.deserialize_span(open), + close: map.deserialize_span(close), + kind, + tt: [lo, len], + } } } -impl LiteralRepr { - fn write(self) -> [u32; 2] { - [self.id.0, self.text] +impl> LiteralRepr { + fn write(self, map: &mut SpanMap) -> [u32; 2] { + [map.serialize_span(self.id), self.text] } - fn read([id, text]: [u32; 2]) -> LiteralRepr { - LiteralRepr { id: TokenId(id), text } + fn read([id, text]: [u32; 2], map: &SpanMap) -> Self { + LiteralRepr { id: map.deserialize_span(id), text } } } -impl PunctRepr { - fn write(self) -> [u32; 3] { +impl> PunctRepr { + fn write(self, map: &mut SpanMap) -> [u32; 3] { let spacing = match self.spacing { tt::Spacing::Alone => 0, tt::Spacing::Joint => 1, }; - [self.id.0, self.char as u32, spacing] + [map.serialize_span(self.id), self.char as u32, spacing] } - fn read([id, char, spacing]: [u32; 3]) -> PunctRepr { + fn read([id, char, spacing]: [u32; 3], map: &SpanMap) -> Self { let spacing = match spacing { 0 => tt::Spacing::Alone, 1 => tt::Spacing::Joint, other => panic!("bad spacing {other}"), }; - PunctRepr { id: TokenId(id), char: char.try_into().unwrap(), spacing } + PunctRepr { id: map.deserialize_span(id), char: char.try_into().unwrap(), spacing } } } -impl IdentRepr { - fn write(self) -> [u32; 2] { - [self.id.0, self.text] +impl> IdentRepr { + fn write(self, map: &mut SpanMap) -> [u32; 2] { + [map.serialize_span(self.id), self.text] } - fn read(data: [u32; 2]) -> IdentRepr { - IdentRepr { id: TokenId(data[0]), text: data[1] } + fn read(data: [u32; 2], map: &SpanMap) -> Self { + IdentRepr { id: map.deserialize_span(data[0]), text: data[1] } } } -struct Writer<'a> { - work: VecDeque<(usize, &'a tt::Subtree)>, +struct Writer<'a, const L: usize, S> { + work: VecDeque<(usize, &'a tt::Subtree)>, string_table: HashMap<&'a str, u32>, - subtree: Vec, - literal: Vec, - punct: Vec, - ident: Vec, + subtree: Vec>, + literal: Vec>, + punct: Vec>, + ident: Vec>, token_tree: Vec, text: Vec, } -impl<'a> Writer<'a> { - fn write(&mut self, root: &'a tt::Subtree) { +impl<'a, const L: usize, S: Copy> Writer<'a, L, S> { + fn write(&mut self, root: &'a tt::Subtree) { self.enqueue(root); while let Some((idx, subtree)) = self.work.pop_front() { self.subtree(idx, subtree); } } - fn subtree(&mut self, idx: usize, subtree: &'a tt::Subtree) { + fn subtree(&mut self, idx: usize, subtree: &'a tt::Subtree) { let mut first_tt = self.token_tree.len(); let n_tt = subtree.token_trees.len(); self.token_tree.resize(first_tt + n_tt, !0); @@ -273,7 +359,7 @@ impl<'a> Writer<'a> { } } - fn enqueue(&mut self, subtree: &'a tt::Subtree) -> u32 { + fn enqueue(&mut self, subtree: &'a tt::Subtree) -> u32 { let idx = self.subtree.len(); let open = subtree.delimiter.open; let close = subtree.delimiter.close; @@ -293,18 +379,18 @@ impl<'a> Writer<'a> { } } -struct Reader { - subtree: Vec, - literal: Vec, - punct: Vec, - ident: Vec, +struct Reader { + subtree: Vec>, + literal: Vec>, + punct: Vec>, + ident: Vec>, token_tree: Vec, text: Vec, } -impl Reader { - pub(crate) fn read(self) -> tt::Subtree { - let mut res: Vec> = vec![None; self.subtree.len()]; +impl> Reader { + pub(crate) fn read(self) -> tt::Subtree { + let mut res: Vec>> = vec![None; self.subtree.len()]; for i in (0..self.subtree.len()).rev() { let repr = &self.subtree[i]; let token_trees = &self.token_tree[repr.tt[0] as usize..repr.tt[1] as usize];