Simplify LazyAttrTokenStream.

This commit does the following.
- Changes it from `Lrc<Box<dyn ToAttrTokenStream>>` to
  `Lrc<LazyAttrTokenStreamInner>`.
- Reworks `LazyAttrTokenStreamImpl` as `LazyAttrTokenStreamInner`, which
  is a two-variant enum.
- Removes the `ToAttrTokenStream` trait and the two impls of it.

The recursion limit must be increased in some crates otherwise rustdoc
aborts.
This commit is contained in:
Nicholas Nethercote 2025-04-29 11:57:27 +10:00
parent 28236ab703
commit 298c56f4ba
12 changed files with 138 additions and 118 deletions

View file

@ -20,6 +20,7 @@
#![feature(never_type)]
#![feature(rustdoc_internals)]
#![feature(stmt_expr_attributes)]
#![recursion_limit = "256"]
// tidy-alphabetical-end
pub mod util {

View file

@ -835,7 +835,7 @@ fn visit_lazy_tts_opt_mut<T: MutVisitor>(vis: &mut T, lazy_tts: Option<&mut Lazy
if let Some(lazy_tts) = lazy_tts {
let mut tts = lazy_tts.to_attr_token_stream();
visit_attr_tts(vis, &mut tts);
*lazy_tts = LazyAttrTokenStream::new(tts);
*lazy_tts = LazyAttrTokenStream::new_direct(tts);
}
}
}

View file

@ -107,25 +107,30 @@ where
}
}
pub trait ToAttrTokenStream: sync::DynSend + sync::DynSync {
fn to_attr_token_stream(&self) -> AttrTokenStream;
}
impl ToAttrTokenStream for AttrTokenStream {
fn to_attr_token_stream(&self) -> AttrTokenStream {
self.clone()
}
}
/// A lazy version of [`TokenStream`], which defers creation
/// of an actual `TokenStream` until it is needed.
/// `Box` is here only to reduce the structure size.
/// A lazy version of [`AttrTokenStream`], which defers creation of an actual
/// `AttrTokenStream` until it is needed.
#[derive(Clone)]
pub struct LazyAttrTokenStream(Arc<Box<dyn ToAttrTokenStream>>);
pub struct LazyAttrTokenStream(Arc<LazyAttrTokenStreamInner>);
impl LazyAttrTokenStream {
pub fn new(inner: impl ToAttrTokenStream + 'static) -> LazyAttrTokenStream {
LazyAttrTokenStream(Arc::new(Box::new(inner)))
pub fn new_direct(stream: AttrTokenStream) -> LazyAttrTokenStream {
LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Direct(stream)))
}
pub fn new_pending(
start_token: (Token, Spacing),
cursor_snapshot: TokenCursor,
num_calls: u32,
break_last_token: u32,
node_replacements: Box<[NodeReplacement]>,
) -> LazyAttrTokenStream {
LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Pending {
start_token,
cursor_snapshot,
num_calls,
break_last_token,
node_replacements,
}))
}
pub fn to_attr_token_stream(&self) -> AttrTokenStream {
@ -208,91 +213,109 @@ impl NodeRange {
}
}
// From a value of this type we can reconstruct the `TokenStream` seen by the
// `f` callback passed to a call to `Parser::collect_tokens`, by
// replaying the getting of the tokens. This saves us producing a `TokenStream`
// if it is never needed, e.g. a captured `macro_rules!` argument that is never
// passed to a proc macro. In practice, token stream creation happens rarely
// compared to calls to `collect_tokens` (see some statistics in #78736) so we
// are doing as little up-front work as possible.
//
// This also makes `Parser` very cheap to clone, since
// there is no intermediate collection buffer to clone.
pub struct LazyAttrTokenStreamImpl {
pub start_token: (Token, Spacing),
pub cursor_snapshot: TokenCursor,
pub num_calls: u32,
pub break_last_token: u32,
pub node_replacements: Box<[NodeReplacement]>,
enum LazyAttrTokenStreamInner {
// The token stream has already been produced.
Direct(AttrTokenStream),
// From a value of this type we can reconstruct the `TokenStream` seen by
// the `f` callback passed to a call to `Parser::collect_tokens`, by
// replaying the getting of the tokens. This saves us producing a
// `TokenStream` if it is never needed, e.g. a captured `macro_rules!`
// argument that is never passed to a proc macro. In practice, token stream
// creation happens rarely compared to calls to `collect_tokens` (see some
// statistics in #78736) so we are doing as little up-front work as
// possible.
//
// This also makes `Parser` very cheap to clone, since there is no
// intermediate collection buffer to clone.
Pending {
start_token: (Token, Spacing),
cursor_snapshot: TokenCursor,
num_calls: u32,
break_last_token: u32,
node_replacements: Box<[NodeReplacement]>,
},
}
impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
impl LazyAttrTokenStreamInner {
fn to_attr_token_stream(&self) -> AttrTokenStream {
// The token produced by the final call to `{,inlined_}next` was not
// actually consumed by the callback. The combination of chaining the
// initial token and using `take` produces the desired result - we
// produce an empty `TokenStream` if no calls were made, and omit the
// final token otherwise.
let mut cursor_snapshot = self.cursor_snapshot.clone();
let tokens = iter::once(FlatToken::Token(self.start_token))
.chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
.take(self.num_calls as usize);
match self {
LazyAttrTokenStreamInner::Direct(stream) => stream.clone(),
LazyAttrTokenStreamInner::Pending {
start_token,
cursor_snapshot,
num_calls,
break_last_token,
node_replacements,
} => {
// The token produced by the final call to `{,inlined_}next` was not
// actually consumed by the callback. The combination of chaining the
// initial token and using `take` produces the desired result - we
// produce an empty `TokenStream` if no calls were made, and omit the
// final token otherwise.
let mut cursor_snapshot = cursor_snapshot.clone();
let tokens = iter::once(FlatToken::Token(*start_token))
.chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
.take(*num_calls as usize);
if self.node_replacements.is_empty() {
make_attr_token_stream(tokens, self.break_last_token)
} else {
let mut tokens: Vec<_> = tokens.collect();
let mut node_replacements = self.node_replacements.to_vec();
node_replacements.sort_by_key(|(range, _)| range.0.start);
if node_replacements.is_empty() {
make_attr_token_stream(tokens, *break_last_token)
} else {
let mut tokens: Vec<_> = tokens.collect();
let mut node_replacements = node_replacements.to_vec();
node_replacements.sort_by_key(|(range, _)| range.0.start);
#[cfg(debug_assertions)]
for [(node_range, tokens), (next_node_range, next_tokens)] in
node_replacements.array_windows()
{
assert!(
node_range.0.end <= next_node_range.0.start
|| node_range.0.end >= next_node_range.0.end,
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
node_range,
tokens,
next_node_range,
next_tokens,
);
#[cfg(debug_assertions)]
for [(node_range, tokens), (next_node_range, next_tokens)] in
node_replacements.array_windows()
{
assert!(
node_range.0.end <= next_node_range.0.start
|| node_range.0.end >= next_node_range.0.end,
"Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
node_range,
tokens,
next_node_range,
next_tokens,
);
}
// Process the replace ranges, starting from the highest start
// position and working our way back. If have tokens like:
//
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
//
// Then we will generate replace ranges for both
// the `#[cfg(FALSE)] field: bool` and the entire
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
//
// By starting processing from the replace range with the greatest
// start position, we ensure that any (outer) replace range which
// encloses another (inner) replace range will fully overwrite the
// inner range's replacement.
for (node_range, target) in node_replacements.into_iter().rev() {
assert!(
!node_range.0.is_empty(),
"Cannot replace an empty node range: {:?}",
node_range.0
);
// Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
// plus enough `FlatToken::Empty`s to fill up the rest of the range. This
// keeps the total length of `tokens` constant throughout the replacement
// process, allowing us to do all replacements without adjusting indices.
let target_len = target.is_some() as usize;
tokens.splice(
(node_range.0.start as usize)..(node_range.0.end as usize),
target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
iter::repeat(FlatToken::Empty)
.take(node_range.0.len() - target_len),
),
);
}
make_attr_token_stream(tokens.into_iter(), *break_last_token)
}
}
// Process the replace ranges, starting from the highest start
// position and working our way back. If have tokens like:
//
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
//
// Then we will generate replace ranges for both
// the `#[cfg(FALSE)] field: bool` and the entire
// `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
//
// By starting processing from the replace range with the greatest
// start position, we ensure that any (outer) replace range which
// encloses another (inner) replace range will fully overwrite the
// inner range's replacement.
for (node_range, target) in node_replacements.into_iter().rev() {
assert!(
!node_range.0.is_empty(),
"Cannot replace an empty node range: {:?}",
node_range.0
);
// Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s, plus
// enough `FlatToken::Empty`s to fill up the rest of the range. This keeps the
// total length of `tokens` constant throughout the replacement process, allowing
// us to do all replacements without adjusting indices.
let target_len = target.is_some() as usize;
tokens.splice(
(node_range.0.start as usize)..(node_range.0.end as usize),
target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
iter::repeat(FlatToken::Empty).take(node_range.0.len() - target_len),
),
);
}
make_attr_token_stream(tokens.into_iter(), self.break_last_token)
}
}
}
@ -1011,6 +1034,7 @@ mod size_asserts {
static_assert_size!(AttrTokenStream, 8);
static_assert_size!(AttrTokenTree, 32);
static_assert_size!(LazyAttrTokenStream, 8);
static_assert_size!(LazyAttrTokenStreamInner, 96);
static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
static_assert_size!(TokenStream, 8);
static_assert_size!(TokenTree, 32);

View file

@ -80,6 +80,7 @@
#![cfg_attr(bootstrap, feature(let_chains))]
#![doc(rust_logo)]
#![feature(rustdoc_internals)]
#![recursion_limit = "256"]
// tidy-alphabetical-end
#[macro_use]

View file

@ -18,6 +18,7 @@
#![feature(rustdoc_internals)]
#![feature(string_from_utf8_lossy_owned)]
#![feature(try_blocks)]
#![recursion_limit = "256"]
// tidy-alphabetical-end
extern crate proc_macro;

View file

@ -14,6 +14,7 @@
#![feature(string_from_utf8_lossy_owned)]
#![feature(trait_alias)]
#![feature(try_blocks)]
#![recursion_limit = "256"]
// tidy-alphabetical-end
//! This crate contains codegen code that is used by all codegen backends (LLVM and others).

View file

@ -162,7 +162,7 @@ pub(crate) fn attr_into_trace(mut attr: Attribute, trace_name: Symbol) -> Attrib
let NormalAttr { item, tokens } = &mut **normal;
item.path.segments[0].ident.name = trace_name;
// This makes the trace attributes unobservable to token-based proc macros.
*tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::default()));
*tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::default()));
}
AttrKind::DocComment(..) => unreachable!(),
}
@ -192,7 +192,7 @@ impl<'a> StripUnconfigured<'a> {
if self.config_tokens {
if let Some(Some(tokens)) = node.tokens_mut() {
let attr_stream = tokens.to_attr_token_stream();
*tokens = LazyAttrTokenStream::new(self.configure_tokens(&attr_stream));
*tokens = LazyAttrTokenStream::new_direct(self.configure_tokens(&attr_stream));
}
}
}
@ -223,7 +223,7 @@ impl<'a> StripUnconfigured<'a> {
target.attrs.flat_map_in_place(|attr| self.process_cfg_attr(&attr));
if self.in_cfg(&target.attrs) {
target.tokens = LazyAttrTokenStream::new(
target.tokens = LazyAttrTokenStream::new_direct(
self.configure_tokens(&target.tokens.to_attr_token_stream()),
);
Some(AttrTokenTree::AttrsTarget(target))
@ -361,7 +361,7 @@ impl<'a> StripUnconfigured<'a> {
.to_attr_token_stream(),
));
let tokens = Some(LazyAttrTokenStream::new(AttrTokenStream::new(trees)));
let tokens = Some(LazyAttrTokenStream::new_direct(AttrTokenStream::new(trees)));
let attr = ast::attr::mk_attr_from_item(
&self.sess.psess.attr_id_generator,
item,

View file

@ -14,6 +14,7 @@
#![feature(never_type)]
#![feature(rustc_attrs)]
#![feature(variant_count)]
#![recursion_limit = "256"]
// tidy-alphabetical-end
extern crate self as rustc_hir;

View file

@ -61,6 +61,7 @@
#![feature(try_trait_v2_yeet)]
#![feature(type_alias_impl_trait)]
#![feature(yeet_expr)]
#![recursion_limit = "256"]
// tidy-alphabetical-end
#[cfg(test)]

View file

@ -11,6 +11,7 @@
#![feature(if_let_guard)]
#![feature(iter_intersperse)]
#![feature(string_from_utf8_lossy_owned)]
#![recursion_limit = "256"]
// tidy-alphabetical-end
use std::path::{Path, PathBuf};

View file

@ -3,8 +3,7 @@ use std::mem;
use rustc_ast::token::Token;
use rustc_ast::tokenstream::{
AttrsTarget, LazyAttrTokenStream, LazyAttrTokenStreamImpl, NodeRange, ParserRange, Spacing,
TokenCursor,
AttrsTarget, LazyAttrTokenStream, NodeRange, ParserRange, Spacing, TokenCursor,
};
use rustc_ast::{self as ast, AttrVec, Attribute, HasAttrs, HasTokens};
use rustc_data_structures::fx::FxHashSet;
@ -337,13 +336,13 @@ impl<'a> Parser<'a> {
// - `attrs`: includes the outer and the inner attr.
// - `tokens`: lazy tokens for `g` (with its inner attr deleted).
let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
start_token: collect_pos.start_token,
cursor_snapshot: collect_pos.cursor_snapshot,
let tokens = LazyAttrTokenStream::new_pending(
collect_pos.start_token,
collect_pos.cursor_snapshot,
num_calls,
break_last_token: self.break_last_token,
self.break_last_token,
node_replacements,
});
);
let mut tokens_used = false;
// If in "definite capture mode" we need to register a replace range
@ -405,14 +404,3 @@ fn needs_tokens(attrs: &[ast::Attribute]) -> bool {
}
})
}
// Some types are used a lot. Make sure they don't unintentionally get bigger.
#[cfg(target_pointer_width = "64")]
mod size_asserts {
use rustc_data_structures::static_assert_size;
use super::*;
// tidy-alphabetical-start
static_assert_size!(LazyAttrTokenStreamImpl, 96);
// tidy-alphabetical-end
}

View file

@ -19,6 +19,7 @@
#![feature(iter_intersperse)]
#![feature(rustc_attrs)]
#![feature(rustdoc_internals)]
#![recursion_limit = "256"]
// tidy-alphabetical-end
use std::cell::{Cell, RefCell};