rust/src/libsyntax/parse/token.rs
kennytm 95b7e6fe92
Rollup merge of #49852 - alexcrichton:fix-more-proc-macros, r=nrc
proc_macro: Avoid cached TokenStream more often

This commit adds even more pessimization to use the cached `TokenStream` inside
of an AST node. As a reminder the `proc_macro` API requires taking an arbitrary
AST node and transforming it back into a `TokenStream` to hand off to a
procedural macro. Such functionality isn't actually implemented in rustc today,
so the way `proc_macro` works today is that it stringifies an AST node and then
reparses for a list of tokens.

This strategy unfortunately loses all span information, so we try to avoid it
whenever possible. Implemented in #43230 some AST nodes have a `TokenStream`
cache representing the tokens they were originally parsed from. This
`TokenStream` cache, however, has turned out to not always reflect the current
state of the item when it's being tokenized. For example `#[cfg]` processing or
macro expansion could modify the state of an item. Consequently we've seen a
number of bugs (#48644 and #49846) related to using this stale cache.

This commit tweaks the usage of the cached `TokenStream` to compare it to our
lossy stringification of the token stream. If the tokens that make up the cache
and the stringified token stream are the same then we return the cached version
(which has correct span information). If they differ, however, then we will
return the stringified version as the cache has been invalidated and we just
haven't figured that out.

Closes #48644
Closes #49846
2018-04-14 15:21:19 +08:00

726 lines
24 KiB
Rust

// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
pub use self::BinOpToken::*;
pub use self::Nonterminal::*;
pub use self::DelimToken::*;
pub use self::Lit::*;
pub use self::Token::*;
use ast::{self};
use parse::ParseSess;
use print::pprust;
use ptr::P;
use serialize::{Decodable, Decoder, Encodable, Encoder};
use symbol::keywords;
use syntax::parse::parse_stream_from_source_str;
use syntax_pos::{self, Span, FileName};
use tokenstream::{TokenStream, TokenTree};
use tokenstream;
use std::{cmp, fmt};
use rustc_data_structures::sync::{Lrc, Lock};
#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
pub enum BinOpToken {
Plus,
Minus,
Star,
Slash,
Percent,
Caret,
And,
Or,
Shl,
Shr,
}
/// A delimiter token
#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
pub enum DelimToken {
/// A round parenthesis: `(` or `)`
Paren,
/// A square bracket: `[` or `]`
Bracket,
/// A curly brace: `{` or `}`
Brace,
/// An empty delimiter
NoDelim,
}
impl DelimToken {
pub fn len(self) -> usize {
if self == NoDelim { 0 } else { 1 }
}
pub fn is_empty(self) -> bool {
self == NoDelim
}
}
#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug, Copy)]
pub enum Lit {
Byte(ast::Name),
Char(ast::Name),
Integer(ast::Name),
Float(ast::Name),
Str_(ast::Name),
StrRaw(ast::Name, usize), /* raw str delimited by n hash symbols */
ByteStr(ast::Name),
ByteStrRaw(ast::Name, usize), /* raw byte str delimited by n hash symbols */
}
impl Lit {
pub fn short_name(&self) -> &'static str {
match *self {
Byte(_) => "byte",
Char(_) => "char",
Integer(_) => "integer",
Float(_) => "float",
Str_(_) | StrRaw(..) => "string",
ByteStr(_) | ByteStrRaw(..) => "byte string"
}
}
}
pub(crate) fn ident_can_begin_expr(ident: ast::Ident, is_raw: bool) -> bool {
let ident_token: Token = Ident(ident, is_raw);
!ident_token.is_reserved_ident() ||
ident_token.is_path_segment_keyword() ||
[
keywords::Do.name(),
keywords::Box.name(),
keywords::Break.name(),
keywords::Continue.name(),
keywords::False.name(),
keywords::For.name(),
keywords::If.name(),
keywords::Loop.name(),
keywords::Match.name(),
keywords::Move.name(),
keywords::Return.name(),
keywords::True.name(),
keywords::Unsafe.name(),
keywords::While.name(),
keywords::Yield.name(),
keywords::Static.name(),
].contains(&ident.name)
}
fn ident_can_begin_type(ident: ast::Ident, is_raw: bool) -> bool {
let ident_token: Token = Ident(ident, is_raw);
!ident_token.is_reserved_ident() ||
ident_token.is_path_segment_keyword() ||
[
keywords::Underscore.name(),
keywords::For.name(),
keywords::Impl.name(),
keywords::Fn.name(),
keywords::Unsafe.name(),
keywords::Extern.name(),
keywords::Typeof.name(),
].contains(&ident.name)
}
pub fn is_path_segment_keyword(id: ast::Ident) -> bool {
id.name == keywords::Super.name() ||
id.name == keywords::SelfValue.name() ||
id.name == keywords::SelfType.name() ||
id.name == keywords::Extern.name() ||
id.name == keywords::Crate.name() ||
id.name == keywords::CrateRoot.name() ||
id.name == keywords::DollarCrate.name()
}
// We see this identifier in a normal identifier position, like variable name or a type.
// How was it written originally? Did it use the raw form? Let's try to guess.
pub fn is_raw_guess(ident: ast::Ident) -> bool {
ident.name != keywords::Invalid.name() &&
is_reserved_ident(ident) && !is_path_segment_keyword(ident)
}
// Returns true for reserved identifiers used internally for elided lifetimes,
// unnamed method parameters, crate root module, error recovery etc.
pub fn is_special_ident(id: ast::Ident) -> bool {
id.name <= keywords::Underscore.name()
}
/// Returns `true` if the token is a keyword used in the language.
pub fn is_used_keyword(id: ast::Ident) -> bool {
id.name >= keywords::As.name() && id.name <= keywords::While.name()
}
/// Returns `true` if the token is a keyword reserved for possible future use.
pub fn is_unused_keyword(id: ast::Ident) -> bool {
id.name >= keywords::Abstract.name() && id.name <= keywords::Yield.name()
}
/// Returns `true` if the token is either a special identifier or a keyword.
pub fn is_reserved_ident(id: ast::Ident) -> bool {
is_special_ident(id) || is_used_keyword(id) || is_unused_keyword(id)
}
#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash, Debug)]
pub enum Token {
/* Expression-operator symbols. */
Eq,
Lt,
Le,
EqEq,
Ne,
Ge,
Gt,
AndAnd,
OrOr,
Not,
Tilde,
BinOp(BinOpToken),
BinOpEq(BinOpToken),
/* Structural symbols */
At,
Dot,
DotDot,
DotDotDot,
DotDotEq,
DotEq, // HACK(durka42) never produced by the parser, only used for libproc_macro
Comma,
Semi,
Colon,
ModSep,
RArrow,
LArrow,
FatArrow,
Pound,
Dollar,
Question,
/// An opening delimiter, eg. `{`
OpenDelim(DelimToken),
/// A closing delimiter, eg. `}`
CloseDelim(DelimToken),
/* Literals */
Literal(Lit, Option<ast::Name>),
/* Name components */
Ident(ast::Ident, /* is_raw */ bool),
Lifetime(ast::Ident),
// The `LazyTokenStream` is a pure function of the `Nonterminal`,
// and so the `LazyTokenStream` can be ignored by Eq, Hash, etc.
Interpolated(Lrc<(Nonterminal, LazyTokenStream)>),
// Can be expanded into several tokens.
/// Doc comment
DocComment(ast::Name),
// Junk. These carry no data because we don't really care about the data
// they *would* carry, and don't really want to allocate a new ident for
// them. Instead, users could extract that from the associated span.
/// Whitespace
Whitespace,
/// Comment
Comment,
Shebang(ast::Name),
Eof,
}
impl Token {
pub fn interpolated(nt: Nonterminal) -> Token {
Token::Interpolated(Lrc::new((nt, LazyTokenStream::new())))
}
/// Recovers a `Token` from an `ast::Ident`. This creates a raw identifier if necessary.
pub fn from_ast_ident(ident: ast::Ident) -> Token {
Ident(ident, is_raw_guess(ident))
}
/// Returns `true` if the token starts with '>'.
pub fn is_like_gt(&self) -> bool {
match *self {
BinOp(Shr) | BinOpEq(Shr) | Gt | Ge => true,
_ => false,
}
}
/// Returns `true` if the token can appear at the start of an expression.
pub fn can_begin_expr(&self) -> bool {
match *self {
Ident(ident, is_raw) =>
ident_can_begin_expr(ident, is_raw), // value name or keyword
OpenDelim(..) | // tuple, array or block
Literal(..) | // literal
Not | // operator not
BinOp(Minus) | // unary minus
BinOp(Star) | // dereference
BinOp(Or) | OrOr | // closure
BinOp(And) | // reference
AndAnd | // double reference
// DotDotDot is no longer supported, but we need some way to display the error
DotDot | DotDotDot | DotDotEq | // range notation
Lt | BinOp(Shl) | // associated path
ModSep | // global path
Pound => true, // expression attributes
Interpolated(ref nt) => match nt.0 {
NtIdent(..) | NtExpr(..) | NtBlock(..) | NtPath(..) => true,
_ => false,
},
_ => false,
}
}
/// Returns `true` if the token can appear at the start of a type.
pub fn can_begin_type(&self) -> bool {
match *self {
Ident(ident, is_raw) =>
ident_can_begin_type(ident, is_raw), // type name or keyword
OpenDelim(Paren) | // tuple
OpenDelim(Bracket) | // array
Not | // never
BinOp(Star) | // raw pointer
BinOp(And) | // reference
AndAnd | // double reference
Question | // maybe bound in trait object
Lifetime(..) | // lifetime bound in trait object
Lt | BinOp(Shl) | // associated path
ModSep => true, // global path
Interpolated(ref nt) => match nt.0 {
NtIdent(..) | NtTy(..) | NtPath(..) | NtLifetime(..) => true,
_ => false,
},
_ => false,
}
}
/// Returns `true` if the token can appear at the start of a generic bound.
pub fn can_begin_bound(&self) -> bool {
self.is_path_start() || self.is_lifetime() || self.is_keyword(keywords::For) ||
self == &Question || self == &OpenDelim(Paren)
}
/// Returns `true` if the token is any literal
pub fn is_lit(&self) -> bool {
match *self {
Literal(..) => true,
_ => false,
}
}
/// Returns an identifier if this token is an identifier.
pub fn ident(&self) -> Option<(ast::Ident, /* is_raw */ bool)> {
match *self {
Ident(ident, is_raw) => Some((ident, is_raw)),
Interpolated(ref nt) => match nt.0 {
NtIdent(ident, is_raw) => Some((ident, is_raw)),
_ => None,
},
_ => None,
}
}
/// Returns a lifetime identifier if this token is a lifetime.
pub fn lifetime(&self) -> Option<ast::Ident> {
match *self {
Lifetime(ident) => Some(ident),
Interpolated(ref nt) => match nt.0 {
NtLifetime(ident) => Some(ident),
_ => None,
},
_ => None,
}
}
/// Returns `true` if the token is an identifier.
pub fn is_ident(&self) -> bool {
self.ident().is_some()
}
/// Returns `true` if the token is a lifetime.
pub fn is_lifetime(&self) -> bool {
self.lifetime().is_some()
}
/// Returns `true` if the token is a identifier whose name is the given
/// string slice.
pub fn is_ident_named(&self, name: &str) -> bool {
match self.ident() {
Some((ident, _)) => ident.name.as_str() == name,
None => false
}
}
/// Returns `true` if the token is a documentation comment.
pub fn is_doc_comment(&self) -> bool {
match *self {
DocComment(..) => true,
_ => false,
}
}
/// Returns `true` if the token is interpolated.
pub fn is_interpolated(&self) -> bool {
match *self {
Interpolated(..) => true,
_ => false,
}
}
/// Returns `true` if the token is an interpolated path.
pub fn is_path(&self) -> bool {
if let Interpolated(ref nt) = *self {
if let NtPath(..) = nt.0 {
return true;
}
}
false
}
/// Returns `true` if the token is either the `mut` or `const` keyword.
pub fn is_mutability(&self) -> bool {
self.is_keyword(keywords::Mut) ||
self.is_keyword(keywords::Const)
}
pub fn is_qpath_start(&self) -> bool {
self == &Lt || self == &BinOp(Shl)
}
pub fn is_path_start(&self) -> bool {
self == &ModSep || self.is_qpath_start() || self.is_path() ||
self.is_path_segment_keyword() || self.is_ident() && !self.is_reserved_ident()
}
/// Returns `true` if the token is a given keyword, `kw`.
pub fn is_keyword(&self, kw: keywords::Keyword) -> bool {
self.ident().map(|(ident, is_raw)| ident.name == kw.name() && !is_raw).unwrap_or(false)
}
pub fn is_path_segment_keyword(&self) -> bool {
match self.ident() {
Some((id, false)) => is_path_segment_keyword(id),
_ => false,
}
}
// Returns true for reserved identifiers used internally for elided lifetimes,
// unnamed method parameters, crate root module, error recovery etc.
pub fn is_special_ident(&self) -> bool {
match self.ident() {
Some((id, false)) => is_special_ident(id),
_ => false,
}
}
/// Returns `true` if the token is a keyword used in the language.
pub fn is_used_keyword(&self) -> bool {
match self.ident() {
Some((id, false)) => is_used_keyword(id),
_ => false,
}
}
/// Returns `true` if the token is a keyword reserved for possible future use.
pub fn is_unused_keyword(&self) -> bool {
match self.ident() {
Some((id, false)) => is_unused_keyword(id),
_ => false,
}
}
/// Returns `true` if the token is either a special identifier or a keyword.
pub fn is_reserved_ident(&self) -> bool {
match self.ident() {
Some((id, false)) => is_reserved_ident(id),
_ => false,
}
}
pub fn glue(self, joint: Token) -> Option<Token> {
Some(match self {
Eq => match joint {
Eq => EqEq,
Gt => FatArrow,
_ => return None,
},
Lt => match joint {
Eq => Le,
Lt => BinOp(Shl),
Le => BinOpEq(Shl),
BinOp(Minus) => LArrow,
_ => return None,
},
Gt => match joint {
Eq => Ge,
Gt => BinOp(Shr),
Ge => BinOpEq(Shr),
_ => return None,
},
Not => match joint {
Eq => Ne,
_ => return None,
},
BinOp(op) => match joint {
Eq => BinOpEq(op),
BinOp(And) if op == And => AndAnd,
BinOp(Or) if op == Or => OrOr,
Gt if op == Minus => RArrow,
_ => return None,
},
Dot => match joint {
Dot => DotDot,
DotDot => DotDotDot,
DotEq => DotDotEq,
_ => return None,
},
DotDot => match joint {
Dot => DotDotDot,
Eq => DotDotEq,
_ => return None,
},
Colon => match joint {
Colon => ModSep,
_ => return None,
},
Le | EqEq | Ne | Ge | AndAnd | OrOr | Tilde | BinOpEq(..) | At | DotDotDot | DotEq |
DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
Question | OpenDelim(..) | CloseDelim(..) => return None,
Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
Whitespace | Comment | Shebang(..) | Eof => return None,
})
}
/// Returns tokens that are likely to be typed accidentally instead of the current token.
/// Enables better error recovery when the wrong token is found.
pub fn similar_tokens(&self) -> Option<Vec<Token>> {
match *self {
Comma => Some(vec![Dot, Lt]),
Semi => Some(vec![Colon]),
_ => None
}
}
pub fn interpolated_to_tokenstream(&self, sess: &ParseSess, span: Span)
-> TokenStream
{
let nt = match *self {
Token::Interpolated(ref nt) => nt,
_ => panic!("only works on interpolated tokens"),
};
// An `Interpolated` token means that we have a `Nonterminal`
// which is often a parsed AST item. At this point we now need
// to convert the parsed AST to an actual token stream, e.g.
// un-parse it basically.
//
// Unfortunately there's not really a great way to do that in a
// guaranteed lossless fashion right now. The fallback here is
// to just stringify the AST node and reparse it, but this loses
// all span information.
//
// As a result, some AST nodes are annotated with the token
// stream they came from. Here we attempt to extract these
// lossless token streams before we fall back to the
// stringification.
//
// During early phases of the compiler, though, the AST could
// get modified directly (e.g. attributes added or removed) and
// the internal cache of tokens my not be invalidated or
// updated. Consequently if the "lossless" token stream
// disagrees with our actuall stringification (which has
// historically been much more battle-tested) then we go with
// the lossy stream anyway (losing span information).
let mut tokens = None;
match nt.0 {
Nonterminal::NtItem(ref item) => {
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
}
Nonterminal::NtTraitItem(ref item) => {
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
}
Nonterminal::NtImplItem(ref item) => {
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
}
Nonterminal::NtIdent(ident, is_raw) => {
let token = Token::Ident(ident, is_raw);
tokens = Some(TokenTree::Token(ident.span, token).into());
}
Nonterminal::NtLifetime(ident) => {
let token = Token::Lifetime(ident);
tokens = Some(TokenTree::Token(ident.span, token).into());
}
Nonterminal::NtTT(ref tt) => {
tokens = Some(tt.clone().into());
}
_ => {}
}
let tokens_for_real = nt.1.force(|| {
// FIXME(#43081): Avoid this pretty-print + reparse hack
let source = pprust::token_to_string(self);
parse_stream_from_source_str(FileName::MacroExpansion, source, sess, Some(span))
});
if let Some(tokens) = tokens {
if tokens.eq_unspanned(&tokens_for_real) {
return tokens
}
}
return tokens_for_real
}
}
#[derive(Clone, RustcEncodable, RustcDecodable, Eq, Hash)]
/// For interpolation during macro expansion.
pub enum Nonterminal {
NtItem(P<ast::Item>),
NtBlock(P<ast::Block>),
NtStmt(ast::Stmt),
NtPat(P<ast::Pat>),
NtExpr(P<ast::Expr>),
NtTy(P<ast::Ty>),
NtIdent(ast::Ident, /* is_raw */ bool),
NtLifetime(ast::Ident),
/// Stuff inside brackets for attributes
NtMeta(ast::MetaItem),
NtPath(ast::Path),
NtVis(ast::Visibility),
NtTT(TokenTree),
// These are not exposed to macros, but are used by quasiquote.
NtArm(ast::Arm),
NtImplItem(ast::ImplItem),
NtTraitItem(ast::TraitItem),
NtForeignItem(ast::ForeignItem),
NtGenerics(ast::Generics),
NtWhereClause(ast::WhereClause),
NtArg(ast::Arg),
}
impl PartialEq for Nonterminal {
fn eq(&self, rhs: &Self) -> bool {
match (self, rhs) {
(NtIdent(ident_lhs, is_raw_lhs), NtIdent(ident_rhs, is_raw_rhs)) =>
ident_lhs == ident_rhs && is_raw_lhs == is_raw_rhs,
(NtLifetime(ident_lhs), NtLifetime(ident_rhs)) => ident_lhs == ident_rhs,
(NtTT(tt_lhs), NtTT(tt_rhs)) => tt_lhs == tt_rhs,
// FIXME: Assume that all "complex" nonterminal are not equal, we can't compare them
// correctly based on data from AST. This will prevent them from matching each other
// in macros. The comparison will become possible only when each nonterminal has an
// attached token stream from which it was parsed.
_ => false,
}
}
}
impl fmt::Debug for Nonterminal {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
NtItem(..) => f.pad("NtItem(..)"),
NtBlock(..) => f.pad("NtBlock(..)"),
NtStmt(..) => f.pad("NtStmt(..)"),
NtPat(..) => f.pad("NtPat(..)"),
NtExpr(..) => f.pad("NtExpr(..)"),
NtTy(..) => f.pad("NtTy(..)"),
NtIdent(..) => f.pad("NtIdent(..)"),
NtMeta(..) => f.pad("NtMeta(..)"),
NtPath(..) => f.pad("NtPath(..)"),
NtTT(..) => f.pad("NtTT(..)"),
NtArm(..) => f.pad("NtArm(..)"),
NtImplItem(..) => f.pad("NtImplItem(..)"),
NtTraitItem(..) => f.pad("NtTraitItem(..)"),
NtForeignItem(..) => f.pad("NtForeignItem(..)"),
NtGenerics(..) => f.pad("NtGenerics(..)"),
NtWhereClause(..) => f.pad("NtWhereClause(..)"),
NtArg(..) => f.pad("NtArg(..)"),
NtVis(..) => f.pad("NtVis(..)"),
NtLifetime(..) => f.pad("NtLifetime(..)"),
}
}
}
pub fn is_op(tok: &Token) -> bool {
match *tok {
OpenDelim(..) | CloseDelim(..) | Literal(..) | DocComment(..) |
Ident(..) | Lifetime(..) | Interpolated(..) |
Whitespace | Comment | Shebang(..) | Eof => false,
_ => true,
}
}
#[derive(Clone)]
pub struct LazyTokenStream(Lock<Option<TokenStream>>);
impl cmp::Eq for LazyTokenStream {}
impl PartialEq for LazyTokenStream {
fn eq(&self, _other: &LazyTokenStream) -> bool {
true
}
}
impl fmt::Debug for LazyTokenStream {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&self.clone().0.into_inner(), f)
}
}
impl LazyTokenStream {
pub fn new() -> Self {
LazyTokenStream(Lock::new(None))
}
pub fn force<F: FnOnce() -> TokenStream>(&self, f: F) -> TokenStream {
let mut opt_stream = self.0.lock();
if opt_stream.is_none() {
*opt_stream = Some(f());
}
opt_stream.clone().unwrap()
}
}
impl Encodable for LazyTokenStream {
fn encode<S: Encoder>(&self, _: &mut S) -> Result<(), S::Error> {
Ok(())
}
}
impl Decodable for LazyTokenStream {
fn decode<D: Decoder>(_: &mut D) -> Result<LazyTokenStream, D::Error> {
Ok(LazyTokenStream::new())
}
}
impl ::std::hash::Hash for LazyTokenStream {
fn hash<H: ::std::hash::Hasher>(&self, _hasher: &mut H) {}
}
fn prepend_attrs(sess: &ParseSess,
attrs: &[ast::Attribute],
tokens: Option<&tokenstream::TokenStream>,
span: syntax_pos::Span)
-> Option<tokenstream::TokenStream>
{
let tokens = tokens?;
if attrs.len() == 0 {
return Some(tokens.clone())
}
let mut builder = tokenstream::TokenStreamBuilder::new();
for attr in attrs {
assert_eq!(attr.style, ast::AttrStyle::Outer,
"inner attributes should prevent cached tokens from existing");
// FIXME: Avoid this pretty-print + reparse hack as bove
let name = FileName::MacroExpansion;
let source = pprust::attr_to_string(attr);
let stream = parse_stream_from_source_str(name, source, sess, Some(span));
builder.push(stream);
}
builder.push(tokens.clone());
Some(builder.build())
}