From e11e90f31cedabec1e84b505bbf64103c3421574 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Fri, 27 Jul 2012 19:14:46 -0700 Subject: [PATCH] Make macro-system type and constructor names more uniform; more comments. --- src/libsyntax/ast.rs | 86 ++++++++++------ src/libsyntax/ext/base.rs | 25 ++--- src/libsyntax/ext/expand.rs | 2 +- src/libsyntax/ext/tt/earley_parser.rs | 135 ++++++++++++++------------ src/libsyntax/ext/tt/macro_rules.rs | 26 ++--- src/libsyntax/ext/tt/transcribe.rs | 64 ++++++------ src/libsyntax/parse/common.rs | 2 +- src/libsyntax/parse/parser.rs | 40 ++++---- src/libsyntax/parse/token.rs | 44 +++++---- 9 files changed, 230 insertions(+), 194 deletions(-) diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 35af5e7a9f54..a3480c633d62 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -362,33 +362,53 @@ type capture_item = @{ #[auto_serialize] type capture_clause = @~[capture_item]; +// +// When the main rust parser encounters a syntax-extension invocation, it +// parses the arguments to the invocation as a token-tree. This is a very +// loose structure, such that all sorts of different AST-fragments can +// be passed to syntax extensions using a uniform type. +// +// If the syntax extension is an MBE macro, it will attempt to match its +// LHS "matchers" against the provided token tree, and if it finds a +// match, will transcribe the RHS token tree, splicing in any captured +// early_parser::matched_nonterminals into the tt_nonterminals it finds. +// +// The RHS of an MBE macro is the only place a tt_nonterminal or tt_seq +// makes any real sense. You could write them elsewhere but nothing +// else knows what to do with them, so you'll probably get a syntax +// error. +// #[auto_serialize] #[doc="For macro invocations; parsing is delegated to the macro"] enum token_tree { + tt_tok(span, token::token), tt_delim(~[token_tree]), - tt_flat(span, token::token), - /* These only make sense for right-hand-sides of MBE macros*/ - tt_dotdotdot(span, ~[token_tree], option, bool), - tt_interpolate(span, ident) + // These only make sense for right-hand-sides of MBE macros + tt_seq(span, ~[token_tree], option, bool), + tt_nonterminal(span, ident) } -#[auto_serialize] -type matcher = spanned; - -#[auto_serialize] // // Matchers are nodes defined-by and recognized-by the main rust parser and -// language, but they're only ever found inside syntax-extension invocations. -// They represent a small sub-language for pattern-matching token-trees, and -// are thus primarily used by the macro-defining extension itself. +// language, but they're only ever found inside syntax-extension invocations; +// indeed, the only thing that ever _activates_ the rules in the rust parser +// for parsing a matcher is a matcher looking for the 'mtcs' nonterminal +// itself. Matchers represent a small sub-language for pattern-matching +// token-trees, and are thus primarily used by the macro-defining extension +// itself. // -// mtc_tok ===> A matcher that matches a single token, -// denoted by the token itself. So long as -// there's no $ involved. +// match_tok +// --------- +// +// A matcher that matches a single token, denoted by the token itself. So +// long as there's no $ involved. // // -// mtc_rep ===> A matcher that matches a sequence of -// sub-matchers, denoted various ways: +// match_seq +// --------- +// +// A matcher that matches a sequence of sub-matchers, denoted various +// possible ways: // // $(M)* zero or more Ms // $(M)+ one or more Ms @@ -396,12 +416,14 @@ type matcher = spanned; // $(A B C);* zero or more semi-separated 'A B C' seqs // // -// mtc_bb ===> A matcher that matches one of a few interesting named rust -// nonterminals, such as types, expressions, items, or raw -// token-trees. A black-box matcher on expr, for example, binds an -// expr to a given ident, and that ident can re-occur as an -// interpolation in the RHS of a macro-by-example rule. For -// example: +// match_nonterminal +// ----------------- +// +// A matcher that matches one of a few interesting named rust +// nonterminals, such as types, expressions, items, or raw token-trees. A +// black-box matcher on expr, for example, binds an expr to a given ident, +// and that ident can re-occur as an interpolation in the RHS of a +// macro-by-example rule. For example: // // $foo:expr => 1 + $foo // interpolate an expr // $foo:tt => $foo // interpolate a token-tree @@ -411,21 +433,25 @@ type matcher = spanned; // // As a final, horrifying aside, note that macro-by-example's input is // also matched by one of these matchers. Holy self-referential! It is matched -// by an mtc_rep, specifically this one: +// by an match_seq, specifically this one: // // $( $lhs:mtcs => $rhs:tt );+ // // If you understand that, you have closed to loop and understand the whole // macro system. Congratulations. // +#[auto_serialize] +type matcher = spanned; + +#[auto_serialize] enum matcher_ { - /* match one token */ - mtc_tok(token::token), - /* match repetitions of a sequence: body, separator, zero ok?, - lo, hi position-in-match-array used: */ - mtc_rep(~[matcher], option, bool, uint, uint), - /* parse a Rust NT: name to bind, name of NT, position in match array : */ - mtc_bb(ident, ident, uint) + // match one token + match_tok(token::token), + // match repetitions of a sequence: body, separator, zero ok?, + // lo, hi position-in-match-array used: + match_seq(~[matcher], option, bool, uint, uint), + // parse a Rust NT: name to bind, name of NT, position in match array: + match_nonterminal(ident, ident, uint) } #[auto_serialize] diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index 5fca084b1813..b35bd9a17e9f 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -4,18 +4,6 @@ import diagnostic::span_handler; import codemap::{codemap, span, expn_info, expanded_from}; import std::map::str_hash; - -// Nomenclature / abbreviations in the ext modules: -// -// ms: matcher span, wraps a matcher with fake span -// mtc: matcher -// mtcs: matchers -// tt: token tree -// bt: backtrace -// cx: expansion context -// mr: macro result -// - // obsolete old-style #macro code: // // syntax_expander, normal, macro_defining, macro_definer, @@ -288,17 +276,18 @@ fn get_mac_body(cx: ext_ctxt, sp: span, args: ast::mac_body) // using new syntax. This will be obsolete when #old_macros go away. fn tt_args_to_original_flavor(cx: ext_ctxt, sp: span, arg: ~[ast::token_tree]) -> ast::mac_arg { - import ast::{matcher, matcher_, mtc_tok, mtc_rep, mtc_bb}; + import ast::{matcher, matcher_, match_tok, match_seq, match_nonterminal}; import parse::lexer::{new_tt_reader, tt_reader_as_reader, reader}; - import tt::earley_parser::{parse_or_else, seq, leaf}; + import tt::earley_parser::{parse_or_else, matched_seq, + matched_nonterminal}; // these spans won't matter, anyways fn ms(m: matcher_) -> matcher { {node: m, span: {lo: 0u, hi: 0u, expn_info: none}} } - let argument_gram = ~[ms(mtc_rep(~[ - ms(mtc_bb(@~"arg",@~"expr", 0u)) + let argument_gram = ~[ms(match_seq(~[ + ms(match_nonterminal(@~"arg",@~"expr", 0u)) ], some(parse::token::COMMA), true, 0u, 1u))]; let arg_reader = new_tt_reader(cx.parse_sess().span_diagnostic, @@ -306,10 +295,10 @@ fn tt_args_to_original_flavor(cx: ext_ctxt, sp: span, arg: ~[ast::token_tree]) let args = alt parse_or_else(cx.parse_sess(), cx.cfg(), arg_reader as reader, argument_gram).get(@~"arg") { - @seq(s, _) { + @matched_seq(s, _) { do s.map() |lf| { alt lf { - @leaf(parse::token::w_expr(arg)) { + @matched_nonterminal(parse::token::nt_expr(arg)) { arg /* whew! list of exprs, here we come! */ } _ { fail ~"badly-structured parse result"; } diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index e453a0f556f2..caa5fd417a8f 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -1,7 +1,7 @@ import std::map::hashmap; import ast::{crate, expr_, expr_mac, mac_invoc, mac_invoc_tt, - tt_delim, tt_flat, item_mac}; + tt_delim, tt_tok, item_mac}; import fold::*; import ext::base::*; import ext::qquote::{qq_helper}; diff --git a/src/libsyntax/ext/tt/earley_parser.rs b/src/libsyntax/ext/tt/earley_parser.rs index be331a08fcfe..9b6383d5d3a2 100644 --- a/src/libsyntax/ext/tt/earley_parser.rs +++ b/src/libsyntax/ext/tt/earley_parser.rs @@ -1,6 +1,6 @@ // Earley-like parser for macros. import parse::token; -import parse::token::{token, EOF, to_str, whole_nt}; +import parse::token::{token, EOF, to_str, nonterminal}; import parse::lexer::*; //resolve bug? //import parse::lexer::{reader, tt_reader, tt_reader_as_reader}; import parse::parser::{parser,SOURCE_FILE}; @@ -8,20 +8,22 @@ import parse::parser::{parser,SOURCE_FILE}; import parse::common::*; //resolve bug? import parse::parse_sess; import dvec::{dvec, extensions}; -import ast::{matcher, mtc_tok, mtc_rep, mtc_bb, ident}; +import ast::{matcher, match_tok, match_seq, match_nonterminal, ident}; import ast_util::mk_sp; import std::map::{hashmap, box_str_hash}; -/* This is an Earley-like parser, without support for nonterminals. This -means that there are no completer or predictor rules, and therefore no need to -store one column per token: instead, there's a set of current Earley items and -a set of next ones. Instead of NTs, we have a special case for Kleene -star. The big-O, in pathological cases, is worse than traditional Earley -parsing, but it's an easier fit for Macro-by-Example-style rules, and I think -the overhead is lower. */ +/* This is an Earley-like parser, without support for in-grammar nonterminals, +onlyl calling out to the main rust parser for named nonterminals (which it +commits to fully when it hits one in a grammar). This means that there are no +completer or predictor rules, and therefore no need to store one column per +token: instead, there's a set of current Earley items and a set of next +ones. Instead of NTs, we have a special case for Kleene star. The big-O, in +pathological cases, is worse than traditional Earley parsing, but it's an +easier fit for Macro-by-Example-style rules, and I think the overhead is +lower. */ -/* to avoid costly uniqueness checks, we require that `mtc_rep` always has a +/* to avoid costly uniqueness checks, we require that `match_seq` always has a nonempty body. */ enum matcher_pos_up { /* to break a circularity */ @@ -40,7 +42,7 @@ type matcher_pos = ~{ sep: option, mut idx: uint, mut up: matcher_pos_up, // mutable for swapping only - matches: ~[dvec<@arb_depth>], + matches: ~[dvec<@named_match>], match_lo: uint, match_hi: uint, sp_lo: uint, }; @@ -55,9 +57,9 @@ fn copy_up(&& mpu: matcher_pos_up) -> matcher_pos { fn count_names(ms: &[matcher]) -> uint { vec::foldl(0u, ms, |ct, m| { ct + alt m.node { - mtc_tok(_) { 0u } - mtc_rep(more_ms, _, _, _, _) { count_names(more_ms) } - mtc_bb(_,_,_) { 1u } + match_tok(_) { 0u } + match_seq(more_ms, _, _, _, _) { count_names(more_ms) } + match_nonterminal(_,_,_) { 1u } }}) } @@ -67,9 +69,13 @@ fn initial_matcher_pos(ms: ~[matcher], sep: option, lo: uint) let mut match_idx_hi = 0u; for ms.each() |elt| { alt elt.node { - mtc_tok(_) {} - mtc_rep(_,_,_,_,hi) { match_idx_hi = hi; } //it is monotonic... - mtc_bb(_,_,pos) { match_idx_hi = pos+1u; } //...so latest is highest + match_tok(_) {} + match_seq(_,_,_,_,hi) { + match_idx_hi = hi; // it is monotonic... + } + match_nonterminal(_,_,pos) { + match_idx_hi = pos+1u; // ...so latest is highest + } } } ~{elts: ms, sep: sep, mut idx: 0u, mut up: matcher_pos_up(none), @@ -77,38 +83,42 @@ fn initial_matcher_pos(ms: ~[matcher], sep: option, lo: uint) match_lo: 0u, match_hi: match_idx_hi, sp_lo: lo} } -// arb_depth is a pattern-match result for a single black-box matcher -// (ast::mtc_bb): so it is associated with a single ident in a parse, and all -// leaves in the arb_depth have the same nonterminal type (expr, item, -// etc). All the leaves in a single arb_depth correspond to a single mtc_bb in -// the ast::matcher that produced it. +// named_match is a pattern-match result for a single ast::match_nonterminal: +// so it is associated with a single ident in a parse, and all +// matched_nonterminals in the named_match have the same nonterminal type +// (expr, item, etc). All the leaves in a single named_match correspond to a +// single matcher_nonterminal in the ast::matcher that produced it. // // It should probably be renamed, it has more or less exact correspondence to -// ast::match nodes, and the in-memory structure of a particular arb_depth +// ast::match nodes, and the in-memory structure of a particular named_match // represents the match that occurred when a particular subset of an -// ast::match -- those ast::matcher nodes leading to a single mtc_bb -- was -// applied to a particular token tree. +// ast::match -- those ast::matcher nodes leading to a single +// match_nonterminal -- was applied to a particular token tree. // -// The width of each seq in the arb_depth, and the identity of the leaf nodes, -// will depend on the token tree it was applied to: each seq corresponds to a -// single mtc_rep in the originating ast::matcher. The depth of the arb_depth -// structure will therefore depend only on the nesting depth of mtc_reps in -// the originating ast::matcher it was derived from. +// The width of each matched_seq in the named_match, and the identity of the +// matched_nonterminals, will depend on the token tree it was applied to: each +// matched_seq corresponds to a single match_seq in the originating +// ast::matcher. The depth of the named_match structure will therefore depend +// only on the nesting depth of ast::match_seqs in the originating +// ast::matcher it was derived from. -enum arb_depth { leaf(whole_nt), seq(~[@arb_depth], codemap::span) } +enum named_match { + matched_seq(~[@named_match], codemap::span), + matched_nonterminal(nonterminal) +} type earley_item = matcher_pos; -fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@arb_depth]) - -> hashmap { - fn n_rec(p_s: parse_sess, m: matcher, res: ~[@arb_depth], - ret_val: hashmap) { +fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@named_match]) + -> hashmap { + fn n_rec(p_s: parse_sess, m: matcher, res: ~[@named_match], + ret_val: hashmap) { alt m { - {node: mtc_tok(_), span: _} { } - {node: mtc_rep(more_ms, _, _, _, _), span: _} { + {node: match_tok(_), span: _} { } + {node: match_seq(more_ms, _, _, _, _), span: _} { for more_ms.each() |next_m| { n_rec(p_s, next_m, res, ret_val) }; } - {node: mtc_bb(bind_name, _, idx), span: sp} { + {node: match_nonterminal(bind_name, _, idx), span: sp} { if ret_val.contains_key(bind_name) { p_s.span_diagnostic.span_fatal(sp, ~"Duplicated bind name: " + *bind_name) @@ -117,18 +127,18 @@ fn nameize(p_s: parse_sess, ms: ~[matcher], res: ~[@arb_depth]) } } } - let ret_val = box_str_hash::<@arb_depth>(); + let ret_val = box_str_hash::<@named_match>(); for ms.each() |m| { n_rec(p_s, m, res, ret_val) } ret ret_val; } enum parse_result { - success(hashmap), + success(hashmap), failure(codemap::span, ~str) } fn parse_or_else(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, - ms: ~[matcher]) -> hashmap { + ms: ~[matcher]) -> hashmap { alt parse(sess, cfg, rdr, ms) { success(m) { m } failure(sp, str) { @@ -182,7 +192,9 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) for uint::range(ei.match_lo, ei.match_hi) |idx| { let sub = ei.matches[idx].get(); new_pos.matches[idx] - .push(@seq(sub, mk_sp(ei.sp_lo,sp.hi))); + .push(@matched_seq(sub, + mk_sp(ei.sp_lo, + sp.hi))); } new_pos.idx += 1u; @@ -212,20 +224,21 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) } else { alt copy ei.elts[idx].node { /* need to descend into sequence */ - mtc_rep(matchers, sep, zero_ok, match_idx_lo, match_idx_hi){ + match_seq(matchers, sep, zero_ok, + match_idx_lo, match_idx_hi){ if zero_ok { let new_ei = copy ei; new_ei.idx += 1u; //we specifically matched zero repeats. for uint::range(match_idx_lo, match_idx_hi) |idx| { - new_ei.matches[idx].push(@seq(~[], sp)); + new_ei.matches[idx].push(@matched_seq(~[], sp)); } vec::push(cur_eis, new_ei); } let matches = vec::map(ei.matches, // fresh, same size: - |_m| dvec::<@arb_depth>()); + |_m| dvec::<@named_match>()); let ei_t <- ei; vec::push(cur_eis, ~{ elts: matchers, sep: sep, mut idx: 0u, @@ -235,8 +248,8 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) sp_lo: sp.lo }); } - mtc_bb(_,_,_) { vec::push(bb_eis, ei) } - mtc_tok(t) { + match_nonterminal(_,_,_) { vec::push(bb_eis, ei) } + match_tok(t) { let ei_t <- ei; if t == tok { ei_t.idx += 1u; vec::push(next_eis, ei_t)} } @@ -260,7 +273,7 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) || bb_eis.len() > 1u { let nts = str::connect(vec::map(bb_eis, |ei| { alt ei.elts[ei.idx].node { - mtc_bb(bind,name,_) { + match_nonterminal(bind,name,_) { #fmt["%s ('%s')", *name, *bind] } _ { fail; } } }), ~" or "); @@ -282,8 +295,8 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) let ei = vec::pop(bb_eis); alt ei.elts[ei.idx].node { - mtc_bb(_, name, idx) { - ei.matches[idx].push(@leaf( + match_nonterminal(_, name, idx) { + ei.matches[idx].push(@matched_nonterminal( parse_nt(rust_parser, *name))); ei.idx += 1u; } @@ -305,31 +318,31 @@ fn parse(sess: parse_sess, cfg: ast::crate_cfg, rdr: reader, ms: ~[matcher]) } } -fn parse_nt(p: parser, name: ~str) -> whole_nt { +fn parse_nt(p: parser, name: ~str) -> nonterminal { alt name { ~"item" { alt p.parse_item(~[], ast::public) { - some(i) { token::w_item(i) } + some(i) { token::nt_item(i) } none { p.fatal(~"expected an item keyword") } }} - ~"block" { token::w_block(p.parse_block()) } - ~"stmt" { token::w_stmt(p.parse_stmt(~[])) } - ~"pat" { token::w_pat(p.parse_pat()) } - ~"expr" { token::w_expr(p.parse_expr()) } - ~"ty" { token::w_ty(p.parse_ty(false /* no need to disambiguate*/)) } + ~"block" { token::nt_block(p.parse_block()) } + ~"stmt" { token::nt_stmt(p.parse_stmt(~[])) } + ~"pat" { token::nt_pat(p.parse_pat()) } + ~"expr" { token::nt_expr(p.parse_expr()) } + ~"ty" { token::nt_ty(p.parse_ty(false /* no need to disambiguate*/)) } // this could be handled like a token, since it is one ~"ident" { alt copy p.token { - token::IDENT(sn,b) { p.bump(); token::w_ident(sn,b) } + token::IDENT(sn,b) { p.bump(); token::nt_ident(sn,b) } _ { p.fatal(~"expected ident, found " + token::to_str(*p.reader.interner(), copy p.token)) } } } - ~"path" { token::w_path(p.parse_path_with_tps(false)) } + ~"path" { token::nt_path(p.parse_path_with_tps(false)) } ~"tt" { p.quote_depth += 1u; //but in theory, non-quoted tts might be useful - let res = token::w_tt(@p.parse_token_tree()); + let res = token::nt_tt(@p.parse_token_tree()); p.quote_depth -= 1u; res } - ~"mtcs" { token::w_mtcs(p.parse_matchers()) } + ~"matchers" { token::nt_matchers(p.parse_matchers()) } _ { p.fatal(~"Unsupported builtin nonterminal parser: " + name)} } } diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index a1bb879965fb..cd6ebce4394a 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -1,10 +1,12 @@ import base::{ext_ctxt, mac_result, mr_expr, mr_def, expr_tt}; import codemap::span; -import ast::{ident, matcher_, matcher, mtc_tok, mtc_bb, mtc_rep, tt_delim}; +import ast::{ident, matcher_, matcher, match_tok, + match_nonterminal, match_seq, tt_delim}; import parse::lexer::{new_tt_reader, tt_reader_as_reader, reader}; -import parse::token::{FAT_ARROW, SEMI, LBRACE, RBRACE, w_mtcs, w_tt}; +import parse::token::{FAT_ARROW, SEMI, LBRACE, RBRACE, nt_matchers, nt_tt}; import parse::parser::{parser, SOURCE_FILE}; -import earley_parser::{parse, success, failure, arb_depth, seq, leaf}; +import earley_parser::{parse, success, failure, named_match, + matched_seq, matched_nonterminal}; import std::map::hashmap; @@ -17,10 +19,10 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident, } let argument_gram = ~[ - ms(mtc_rep(~[ - ms(mtc_bb(@~"lhs",@~"mtcs", 0u)), - ms(mtc_tok(FAT_ARROW)), - ms(mtc_bb(@~"rhs",@~"tt", 1u)), + ms(match_seq(~[ + ms(match_nonterminal(@~"lhs",@~"matchers", 0u)), + ms(match_tok(FAT_ARROW)), + ms(match_nonterminal(@~"rhs",@~"tt", 1u)), ], some(SEMI), false, 0u, 2u))]; let arg_reader = new_tt_reader(cx.parse_sess().span_diagnostic, @@ -32,16 +34,16 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident, }; let lhses = alt arguments.get(@~"lhs") { - @seq(s, sp) { s } + @matched_seq(s, sp) { s } _ { cx.span_bug(sp, ~"wrong-structured lhs") } }; let rhses = alt arguments.get(@~"rhs") { - @seq(s, sp) { s } + @matched_seq(s, sp) { s } _ { cx.span_bug(sp, ~"wrong-structured rhs") } }; fn generic_extension(cx: ext_ctxt, sp: span, arg: ~[ast::token_tree], - lhses: ~[@arb_depth], rhses: ~[@arb_depth]) + lhses: ~[@named_match], rhses: ~[@named_match]) -> mac_result { let mut best_fail_spot = {lo: 0u, hi: 0u, expn_info: none}; let mut best_fail_msg = ~"internal error: ran no matchers"; @@ -51,12 +53,12 @@ fn add_new_extension(cx: ext_ctxt, sp: span, name: ident, for lhses.eachi() |i, lhs| { alt lhs { - @leaf(w_mtcs(mtcs)) { + @matched_nonterminal(nt_matchers(mtcs)) { let arg_rdr = new_tt_reader(s_d, itr, none, arg) as reader; alt parse(cx.parse_sess(), cx.cfg(), arg_rdr, mtcs) { success(m) { let rhs = alt rhses[i] { - @leaf(w_tt(@tt)) { tt } + @matched_nonterminal(nt_tt(@tt)) { tt } _ { cx.span_bug(sp, ~"bad thing in rhs") } }; let trncbr = new_tt_reader(s_d, itr, some(m), ~[rhs]); diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index 1a7ccda3da42..b9d490e9e287 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -1,10 +1,10 @@ import util::interner::interner; import diagnostic::span_handler; -import ast::{token_tree,tt_delim,tt_flat,tt_dotdotdot,tt_interpolate,ident}; -import earley_parser::{arb_depth,seq,leaf}; +import ast::{token_tree, tt_delim, tt_tok, tt_seq, tt_nonterminal,ident}; +import earley_parser::{named_match, matched_seq, matched_nonterminal}; import codemap::span; -import parse::token::{EOF,ACTUALLY,IDENT,token,w_ident}; -import std::map::{hashmap,box_str_hash}; +import parse::token::{EOF, INTERPOLATED, IDENT, token, nt_ident}; +import std::map::{hashmap, box_str_hash}; export tt_reader, new_tt_reader, dup_tt_reader, tt_next_token; @@ -28,7 +28,7 @@ type tt_reader = @{ interner: @interner<@~str>, mut cur: tt_frame, /* for MBE-style macro transcription */ - interpolations: std::map::hashmap, + interpolations: std::map::hashmap, mut repeat_idx: ~[mut uint], mut repeat_len: ~[uint], /* cached: */ @@ -37,17 +37,17 @@ type tt_reader = @{ }; /** This can do Macro-By-Example transcription. On the other hand, if - * `src` contains no `tt_dotdotdot`s and `tt_interpolate`s, `interp` can (and + * `src` contains no `tt_seq`s and `tt_nonterminal`s, `interp` can (and * should) be none. */ fn new_tt_reader(sp_diag: span_handler, itr: @interner<@~str>, - interp: option>, + interp: option>, src: ~[ast::token_tree]) -> tt_reader { let r = @{sp_diag: sp_diag, interner: itr, mut cur: @{readme: src, mut idx: 0u, dotdotdoted: false, sep: none, up: tt_frame_up(option::none)}, interpolations: alt interp { /* just a convienience */ - none { std::map::box_str_hash::<@arb_depth>() } + none { std::map::box_str_hash::<@named_match>() } some(x) { x } }, mut repeat_idx: ~[mut], mut repeat_len: ~[], @@ -79,18 +79,22 @@ pure fn dup_tt_reader(&&r: tt_reader) -> tt_reader { } -pure fn lookup_cur_ad_by_ad(r: tt_reader, start: @arb_depth) -> @arb_depth { - pure fn red(&&ad: @arb_depth, &&idx: uint) -> @arb_depth { +pure fn lookup_cur_matched_by_matched(r: tt_reader, + start: @named_match) -> @named_match { + pure fn red(&&ad: @named_match, &&idx: uint) -> @named_match { alt *ad { - leaf(_) { ad /* end of the line; duplicate henceforth */ } - seq(ads, _) { ads[idx] } + matched_nonterminal(_) { + // end of the line; duplicate henceforth + ad + } + matched_seq(ads, _) { ads[idx] } } } vec::foldl(start, r.repeat_idx, red) } -fn lookup_cur_ad(r: tt_reader, name: ident) -> @arb_depth { - lookup_cur_ad_by_ad(r, r.interpolations.get(name)) +fn lookup_cur_matched(r: tt_reader, name: ident) -> @named_match { + lookup_cur_matched_by_matched(r, r.interpolations.get(name)) } enum lis { lis_unconstrained, lis_constraint(uint, ident), lis_contradiction(~str) @@ -116,15 +120,15 @@ fn lockstep_iter_size(&&t: token_tree, &&r: tt_reader) -> lis { } } alt t { - tt_delim(tts) | tt_dotdotdot(_, tts, _, _) { + tt_delim(tts) | tt_seq(_, tts, _, _) { vec::foldl(lis_unconstrained, tts, {|lis, tt| lis_merge(lis, lockstep_iter_size(tt, r)) }) } - tt_flat(*) { lis_unconstrained } - tt_interpolate(_, name) { - alt *lookup_cur_ad(r, name) { - leaf(_) { lis_unconstrained } - seq(ads, _) { lis_constraint(ads.len(), name) } + tt_tok(*) { lis_unconstrained } + tt_nonterminal(_, name) { + alt *lookup_cur_matched(r, name) { + matched_nonterminal(_) { lis_unconstrained } + matched_seq(ads, _) { lis_constraint(ads.len(), name) } } } } @@ -166,20 +170,20 @@ fn tt_next_token(&&r: tt_reader) -> {tok: token, sp: span} { } } loop { /* because it's easiest, this handles `tt_delim` not starting - with a `tt_flat`, even though it won't happen */ + with a `tt_tok`, even though it won't happen */ alt r.cur.readme[r.cur.idx] { tt_delim(tts) { r.cur = @{readme: tts, mut idx: 0u, dotdotdoted: false, sep: none, up: tt_frame_up(option::some(r.cur)) }; // if this could be 0-length, we'd need to potentially recur here } - tt_flat(sp, tok) { + tt_tok(sp, tok) { r.cur_span = sp; r.cur_tok = tok; r.cur.idx += 1u; ret ret_val; } - tt_dotdotdot(sp, tts, sep, zerok) { - alt lockstep_iter_size(tt_dotdotdot(sp, tts, sep, zerok), r) { + tt_seq(sp, tts, sep, zerok) { + alt lockstep_iter_size(tt_seq(sp, tts, sep, zerok), r) { lis_unconstrained { r.sp_diag.span_fatal( sp, /* blame macro writer */ @@ -211,22 +215,22 @@ fn tt_next_token(&&r: tt_reader) -> {tok: token, sp: span} { } } // FIXME #2887: think about span stuff here - tt_interpolate(sp, ident) { - alt *lookup_cur_ad(r, ident) { + tt_nonterminal(sp, ident) { + alt *lookup_cur_matched(r, ident) { /* sidestep the interpolation tricks for ident because (a) idents can be in lots of places, so it'd be a pain (b) we actually can, since it's a token. */ - leaf(w_ident(sn,b)) { + matched_nonterminal(nt_ident(sn,b)) { r.cur_span = sp; r.cur_tok = IDENT(sn,b); r.cur.idx += 1u; ret ret_val; } - leaf(w_nt) { - r.cur_span = sp; r.cur_tok = ACTUALLY(w_nt); + matched_nonterminal(nt) { + r.cur_span = sp; r.cur_tok = INTERPOLATED(nt); r.cur.idx += 1u; ret ret_val; } - seq(*) { + matched_seq(*) { r.sp_diag.span_fatal( copy r.cur_span, /* blame the macro writer */ #fmt["variable '%s' is still repeating at this depth", diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs index e35b762f9720..7898cd648908 100644 --- a/src/libsyntax/parse/common.rs +++ b/src/libsyntax/parse/common.rs @@ -86,7 +86,7 @@ impl parser_common of parser_common for parser { fn parse_ident() -> ast::ident { alt copy self.token { token::IDENT(i, _) { self.bump(); ret self.get_str(i); } - token::ACTUALLY(token::w_ident(*)) { self.bug( + token::INTERPOLATED(token::nt_ident(*)) { self.bug( ~"ident interpolation not converted to real token"); } _ { self.fatal(~"expected ident, found `" + token_to_str(self.reader, self.token) diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 51f6ae021cdd..888bb2568523 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -3,7 +3,7 @@ import print::pprust::expr_to_str; import result::result; import either::{either, left, right}; import std::map::{hashmap, str_hash}; -import token::{can_begin_expr, is_ident, is_plain_ident, ACTUALLY}; +import token::{can_begin_expr, is_ident, is_plain_ident, INTERPOLATED}; import codemap::{span,fss_none}; import util::interner; import ast_util::{spanned, respan, mk_sp, ident_to_path, operator_prec}; @@ -39,15 +39,15 @@ import ast::{_mod, add, alt_check, alt_exhaustive, arg, arm, attribute, item_ty, lit, lit_, lit_bool, lit_float, lit_int, lit_int_unsuffixed, lit_nil, lit_str, lit_uint, local, m_const, m_imm, m_mutbl, mac_, mac_aq, mac_ellipsis, - mac_invoc, mac_invoc_tt, mac_var, matcher, - method, mode, mt, mtc_bb, mtc_rep, mtc_tok, mul, mutability, neg, + mac_invoc, mac_invoc_tt, mac_var, matcher, match_nonterminal, + match_seq, match_tok, method, mode, mt, mul, mutability, neg, noreturn, not, pat, pat_box, pat_enum, pat_ident, pat_lit, pat_range, pat_rec, pat_tup, pat_uniq, pat_wild, path, private, proto, proto_any, proto_bare, proto_block, proto_box, proto_uniq, provided, public, pure_fn, purity, re_anon, re_named, region, rem, required, ret_style, return_val, shl, shr, stmt, stmt_decl, stmt_expr, stmt_semi, subtract, token_tree, trait_method, - trait_ref, tt_delim, tt_dotdotdot, tt_flat, tt_interpolate, ty, + trait_ref, tt_delim, tt_seq, tt_tok, tt_nonterminal, ty, ty_, ty_bot, ty_box, ty_field, ty_fn, ty_infer, ty_mac, ty_method, ty_nil, ty_param, ty_path, ty_ptr, ty_rec, ty_rptr, ty_tup, ty_u32, ty_uniq, ty_vec, ty_fixed_length, unchecked_blk, @@ -104,14 +104,14 @@ type item_info = (ident, item_, option<~[attribute]>); /* The expr situation is not as complex as I thought it would be. The important thing is to make sure that lookahead doesn't balk -at ACTUALLY tokens */ -macro_rules! maybe_whole_expr{ +at INTERPOLATED tokens */ +macro_rules! maybe_whole_expr { {$p:expr} => { alt copy $p.token { - ACTUALLY(token::w_expr(e)) { + INTERPOLATED(token::nt_expr(e)) { $p.bump(); ret pexpr(e); } - ACTUALLY(token::w_path(pt)) { + INTERPOLATED(token::nt_path(pt)) { $p.bump(); ret $p.mk_pexpr($p.span.lo, $p.span.lo, expr_path(pt)); @@ -122,7 +122,7 @@ macro_rules! maybe_whole_expr{ macro_rules! maybe_whole { {$p:expr, $constructor:path} => { alt copy $p.token { - ACTUALLY($constructor(x)) { $p.bump(); ret x; } + INTERPOLATED($constructor(x)) { $p.bump(); ret x; } _ {} }} } @@ -133,7 +133,7 @@ fn dummy() { /* we will need this to bootstrap maybe_whole! */ #macro[[#maybe_whole_path[p], alt p.token { - ACTUALLY(token::w_path(pt)) { p.bump(); ret pt; } + INTERPOLATED(token::nt_path(pt)) { p.bump(); ret pt; } _ {} }]]; } @@ -1090,7 +1090,7 @@ class parser { } } - fn parse_tt_flat(p: parser, delim_ok: bool) -> token_tree { + fn parse_tt_tok(p: parser, delim_ok: bool) -> token_tree { alt p.token { token::RPAREN | token::RBRACE | token::RBRACKET if !delim_ok { @@ -1110,14 +1110,14 @@ class parser { seq_sep_none(), |p| p.parse_token_tree()); let (s, z) = p.parse_sep_and_zerok(); - ret tt_dotdotdot(mk_sp(sp.lo ,p.span.hi), seq.node, s, z); + ret tt_seq(mk_sp(sp.lo ,p.span.hi), seq.node, s, z); } else { - ret tt_interpolate(sp, p.parse_ident()); + ret tt_nonterminal(sp, p.parse_ident()); } } _ { /* ok */ } } - let res = tt_flat(p.span, p.token); + let res = tt_tok(p.span, p.token); p.bump(); ret res; } @@ -1126,14 +1126,14 @@ class parser { token::LPAREN | token::LBRACE | token::LBRACKET { let ket = flip(self.token); tt_delim(vec::append( - ~[parse_tt_flat(self, true)], + ~[parse_tt_tok(self, true)], vec::append( self.parse_seq_to_before_end( ket, seq_sep_none(), |p| p.parse_token_tree()), - ~[parse_tt_flat(self, true)]))) + ~[parse_tt_tok(self, true)]))) } - _ { parse_tt_flat(self, false) } + _ { parse_tt_tok(self, false) } }; } @@ -1177,17 +1177,17 @@ class parser { self.fatal(~"repetition body must be nonempty"); } let (sep, zerok) = self.parse_sep_and_zerok(); - mtc_rep(ms, sep, zerok, name_idx_lo, *name_idx) + match_seq(ms, sep, zerok, name_idx_lo, *name_idx) } else { let bound_to = self.parse_ident(); self.expect(token::COLON); let nt_name = self.parse_ident(); - let m = mtc_bb(bound_to, nt_name, *name_idx); + let m = match_nonterminal(bound_to, nt_name, *name_idx); *name_idx += 1u; m } } else { - let m = mtc_tok(self.token); + let m = match_tok(self.token); self.bump(); m }; diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index a39b74eaca37..51d5d52ebe8d 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -79,7 +79,7 @@ enum token { UNDERSCORE, /* For interpolation */ - ACTUALLY(whole_nt), + INTERPOLATED(nonterminal), DOC_COMMENT(str_num), EOF, @@ -87,17 +87,17 @@ enum token { #[auto_serialize] /// For interpolation during macro expansion. -enum whole_nt { - w_item(@ast::item), - w_block(ast::blk), - w_stmt(@ast::stmt), - w_pat( @ast::pat), - w_expr(@ast::expr), - w_ty( @ast::ty), - w_ident(str_num, bool), - w_path(@ast::path), - w_tt( @ast::token_tree), //needs @ed to break a circularity - w_mtcs(~[ast::matcher]) +enum nonterminal { + nt_item(@ast::item), + nt_block(ast::blk), + nt_stmt(@ast::stmt), + nt_pat( @ast::pat), + nt_expr(@ast::expr), + nt_ty( @ast::ty), + nt_ident(str_num, bool), + nt_path(@ast::path), + nt_tt( @ast::token_tree), //needs @ed to break a circularity + nt_matchers(~[ast::matcher]) } fn binop_to_str(o: binop) -> ~str { @@ -184,14 +184,14 @@ fn to_str(in: interner<@~str>, t: token) -> ~str { /* Other */ DOC_COMMENT(s) { *interner::get(in, s) } EOF { ~"" } - ACTUALLY(w_nt) { + INTERPOLATED(nt) { ~"an interpolated " + - alt w_nt { - w_item(*) { ~"item" } w_block(*) { ~"block" } - w_stmt(*) { ~"statement" } w_pat(*) { ~"pattern" } - w_expr(*) { ~"expression" } w_ty(*) { ~"type" } - w_ident(*) { ~"identifier" } w_path(*) { ~"path" } - w_tt(*) { ~"tt" } w_mtcs(*) { ~"matcher sequence" } + alt nt { + nt_item(*) { ~"item" } nt_block(*) { ~"block" } + nt_stmt(*) { ~"statement" } nt_pat(*) { ~"pattern" } + nt_expr(*) { ~"expression" } nt_ty(*) { ~"type" } + nt_ident(*) { ~"identifier" } nt_path(*) { ~"path" } + nt_tt(*) { ~"tt" } nt_matchers(*) { ~"matcher sequence" } } } } @@ -219,8 +219,10 @@ pure fn can_begin_expr(t: token) -> bool { BINOP(OR) { true } // in lambda syntax OROR { true } // in lambda syntax MOD_SEP { true } - ACTUALLY(w_expr(*)) | ACTUALLY(w_ident(*)) | ACTUALLY(w_block(*)) - | ACTUALLY(w_path(*)) { true } + INTERPOLATED(nt_expr(*)) + | INTERPOLATED(nt_ident(*)) + | INTERPOLATED(nt_block(*)) + | INTERPOLATED(nt_path(*)) { true } _ { false } } }