From 19e1f5cdb6a47070fd5f12993e947ea6db0eb5dd Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 14 Jul 2014 01:52:18 -0700 Subject: [PATCH 01/12] Lexer; subtly wrong; no makefile --- src/grammar/README.md | 19 ++++ src/grammar/RustLexer.g4 | 165 +++++++++++++++++++++++++++++ src/grammar/verify.rs | 217 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 401 insertions(+) create mode 100644 src/grammar/README.md create mode 100644 src/grammar/RustLexer.g4 create mode 100644 src/grammar/verify.rs diff --git a/src/grammar/README.md b/src/grammar/README.md new file mode 100644 index 000000000000..69f8ab1e486a --- /dev/null +++ b/src/grammar/README.md @@ -0,0 +1,19 @@ +Reference grammar. + +Uses [antlr4](http://www.antlr.org/) and a custom Rust tool to compare +ASTs/token streams generated. + +To use: + +``` +antlr4 RustLexer.g4 +javac *.java +rustc -O verify.rs +for file in ../*/**.rs; do + echo $file; + grun RustLexer tokens -tokens < $file | ./verify $file || break +done +``` + +Note That the `../*/**.rs` glob will match every `*.rs` file in the above +directory and all of its recursive children. This is a zsh extension. diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 new file mode 100644 index 000000000000..8a1a39aea0dd --- /dev/null +++ b/src/grammar/RustLexer.g4 @@ -0,0 +1,165 @@ +lexer grammar RustLexer; + +/* Note: due to antlr limitations, we can't represent XID_start and + * XID_continue properly. ASCII-only substitute. */ + +fragment XID_start : [_a-zA-Z] ; +fragment XID_continue : [_a-zA-Z0-9] ; + +/* Expression-operator symbols */ + +EQ : '=' ; +LT : '<' ; +LE : '<=' ; +EQEQ : '==' ; +NE : '!=' ; +GE : '>=' ; +GT : '>' ; +ANDAND : '&&' ; +OROR : '||' ; +NOT : '!' ; +TILDE : '~' ; +PLUS : '+' ; +MINUS : '-' ; +STAR : '*' ; +SLASH : '/' ; +PERCENT : '%' ; +CARET : '^' ; +AND : '&' ; +OR : '|' ; +SHL : '<<' ; +SHR : '>>' ; + +BINOP + : PLUS + | MINUS + | STAR + | PERCENT + | CARET + | AND + | OR + | SHL + | SHR + ; + +BINOPEQ : BINOP EQ ; + +/* "Structural symbols" */ + +AT : '@' ; +DOT : '.' ; +DOTDOT : '..' ; +DOTDOTDOT : '...' ; +COMMA : ',' ; +SEMI : ';' ; +COLON : ':' ; +MOD_SEP : '::' ; +RARROW : '->' ; +FAT_ARROW : '=>' ; +LPAREN : '(' ; +RPAREN : ')' ; +LBRACKET : '[' ; +RBRACKET : ']' ; +LBRACE : '{' ; +RBRACE : '}' ; +POUND : '#'; +DOLLAR : '$' ; +UNDERSCORE : '_' ; + +// Literals + +fragment HEXIT + : [0-9a-fA-F] + ; + +fragment CHAR_ESCAPE + : [nrt\\'"0] + | [xX] HEXIT HEXIT + | 'u' HEXIT HEXIT HEXIT HEXIT + | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT + ; + +LIT_CHAR + : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' + ; + +INT_SUFFIX + : 'i' + | 'i8' + | 'i16' + | 'i32' + | 'i64' + | 'u' + | 'u8' + | 'u16' + | 'u32' + | 'u64' + ; + +LIT_INTEGER + : [0-9][0-9_]* INT_SUFFIX? + | '0b' [01][01_]* INT_SUFFIX? + | '0o' [0-7][0-7_]* INT_SUFFIX? + | '0x' [0-9a-fA-F][0-9a-fA-F_]* INT_SUFFIX? + ; + +FLOAT_SUFFIX + : 'f32' + | 'f64' + | 'f128' + ; + +LIT_FLOAT + : [0-9][0-9_]* ('.' | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? FLOAT_SUFFIX?) + ; + +LIT_STR + : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' + ; + +LIT_BINARY : 'b' LIT_STR ; +LIT_BINARY_RAW : 'b' LIT_STR_RAW ; + +/* this is a bit messy */ + +fragment LIT_STR_RAW_INNER + : '"' .*? '"' + | LIT_STR_RAW_INNER2 + ; + +fragment LIT_STR_RAW_INNER2 + : POUND LIT_STR_RAW_INNER POUND + ; + +LIT_STR_RAW + : 'r' LIT_STR_RAW_INNER + ; + +IDENT : XID_start XID_continue* ; + +LIFETIME : '\'' IDENT ; + +WHITESPACE : [ \r\n\t]+ ; + +COMMENT + : '//' ~[\r\n]* + | '////' ~[\r\n]* + | BLOCK_COMMENT + ; + +mode DOCCOMMENT; + +fragment DOC_BLOCK_COMMENT + : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' + ; + +DOC_COMMENT + : '///' ~[\r\n]* + | '//!' ~[\r\n]* + | DOC_BLOCK_COMMENT + ; + +fragment BLOCK_COMMENT + : '/*' (BLOCK_COMMENT | .)*? '*/' + ; + diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs new file mode 100644 index 000000000000..56c78b89ba2e --- /dev/null +++ b/src/grammar/verify.rs @@ -0,0 +1,217 @@ +#![feature(globs, phase, macro_rules)] + +extern crate syntax; +extern crate rustc; + +#[phase(link)] +extern crate regex; + +#[phase(link, plugin)] +extern crate log; + +#[phase(plugin)] extern crate regex_macros; + +use std::collections::HashMap; +use std::io::File; + +use syntax::parse; +use syntax::parse::lexer; +use rustc::driver::{session, config}; + +use syntax::ast; +use syntax::ast::Name; +use syntax::parse::token::*; +use syntax::parse::lexer::TokenAndSpan; + +fn parse_token_list(file: &str) -> HashMap { + fn id() -> Token { + IDENT(ast::Ident { name: Name(0), ctxt: 0, }, false) + } + + let mut res = HashMap::new(); + + res.insert("-1".to_string(), EOF); + + for line in file.split('\n') { + let eq = match line.trim().rfind('=') { + Some(val) => val, + None => continue + }; + + let val = line.slice_to(eq); + let num = line.slice_from(eq + 1); + + let tok = match val { + "SHR" => BINOP(SHR), + "DOLLAR" => DOLLAR, + "LT" => LT, + "STAR" => BINOP(STAR), + "FLOAT_SUFFIX" => id(), + "INT_SUFFIX" => id(), + "SHL" => BINOP(SHL), + "LBRACE" => LBRACE, + "RARROW" => RARROW, + "LIT_STR" => LIT_STR(Name(0)), + "DOTDOT" => DOTDOT, + "MOD_SEP" => MOD_SEP, + "DOTDOTDOT" => DOTDOTDOT, + "NOT" => NOT, + "AND" => BINOP(AND), + "LPAREN" => LPAREN, + "ANDAND" => ANDAND, + "AT" => AT, + "LBRACKET" => LBRACKET, + "LIT_STR_RAW" => LIT_STR_RAW(Name(0), 0), + "RPAREN" => RPAREN, + "SLASH" => BINOP(SLASH), + "COMMA" => COMMA, + "LIFETIME" => LIFETIME(ast::Ident { name: Name(0), ctxt: 0 }), + "CARET" => BINOP(CARET), + "TILDE" => TILDE, + "IDENT" => id(), + "PLUS" => BINOP(PLUS), + "LIT_CHAR" => LIT_CHAR(Name(0)), + "EQ" => EQ, + "RBRACKET" => RBRACKET, + "COMMENT" => COMMENT, + "DOC_COMMENT" => DOC_COMMENT(Name(0)), + "DOT" => DOT, + "EQEQ" => EQEQ, + "NE" => NE, + "GE" => GE, + "PERCENT" => BINOP(PERCENT), + "RBRACE" => RBRACE, + "BINOP" => BINOP(PLUS), + "POUND" => POUND, + "OROR" => OROR, + "LIT_INTEGER" => LIT_INTEGER(Name(0)), + "BINOPEQ" => BINOPEQ(PLUS), + "LIT_FLOAT" => LIT_FLOAT(Name(0)), + "WHITESPACE" => WS, + "UNDERSCORE" => UNDERSCORE, + "MINUS" => BINOP(MINUS), + "SEMI" => SEMI, + "COLON" => COLON, + "FAT_ARROW" => FAT_ARROW, + "OR" => BINOP(OR), + "GT" => GT, + "LE" => LE, + "LIT_BINARY" => LIT_BINARY(Name(0)), + "LIT_BINARY_RAW" => LIT_BINARY_RAW(Name(0), 0), + _ => continue + }; + + res.insert(num.to_string(), tok); + } + + debug!("Token map: {}", res); + res +} + +fn str_to_binop(mut s: &str) -> BinOp { + if s.ends_with("'") { + s = s.slice_to(s.len() - 1); + } + + match s { + "+" => PLUS, + "-" => MINUS, + "*" => STAR, + "%" => PERCENT, + "^" => CARET, + "&" => AND, + "|" => OR, + "<<" => SHL, + ">>" => SHR, + _ => fail!("Bad binop str {}", s) + } +} + +fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { + let re = regex!(r"\[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?),<(?P-?\d+)>,\d+:\d+]"); + + let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice()); + let start = m.name("start"); + let end = m.name("end"); + let toknum = m.name("toknum"); + let content = m.name("content"); + + let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", toknum).as_slice()); + let real_tok = match *proto_tok { + BINOP(PLUS) => BINOP(str_to_binop(content)), + BINOPEQ(PLUS) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 2))), + ref t => t.clone() + }; + + let offset = if real_tok == EOF { + 1 + } else { + 0 + }; + + let sp = syntax::codemap::Span { + lo: syntax::codemap::BytePos(from_str::(start).unwrap() - offset), + hi: syntax::codemap::BytePos(from_str::(end).unwrap() + 1), + expn_info: None + }; + + TokenAndSpan { + tok: real_tok, + sp: sp + } +} + +fn main() { + fn next(r: &mut lexer::StringReader) -> TokenAndSpan { + use syntax::parse::lexer::Reader; + r.next_token() + } + + let token_map = parse_token_list(File::open(&Path::new("RustLexer.tokens")).unwrap().read_to_string().unwrap().as_slice()); + let mut stdin = std::io::stdin(); + let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map)); + + let code = File::open(&Path::new(std::os::args().get(1).as_slice())).unwrap().read_to_string().unwrap(); + let options = config::basic_options(); + let session = session::build_session(options, None); + let filemap = parse::string_to_filemap(&session.parse_sess, + code, + String::from_str("")); + let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap); + + for antlr_tok in antlr_tokens { + let rustc_tok = next(&mut lexer); + if rustc_tok.tok == EOF && antlr_tok.tok == EOF { + continue + } + + assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok, antlr_tok); + + macro_rules! matches ( + ( $($x:pat),+ ) => ( + match rustc_tok.tok { + $($x => match antlr_tok.tok { + $x => (), + _ => fail!("{} is not {}", antlr_tok, rustc_tok) + },)* + ref c => assert!(c == antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok) + } + ) + ) + + matches!(LIT_BYTE(..), + LIT_CHAR(..), + LIT_INTEGER(..), + LIT_FLOAT(..), + LIT_STR(..), + LIT_STR_RAW(..), + LIT_BINARY(..), + LIT_BINARY_RAW(..), + IDENT(..), + LIFETIME(..), + INTERPOLATED(..), + DOC_COMMENT(..), + SHEBANG(..) + ); + } +} From 1a1a9d54456355cc0ebdd397fd04871abe27f78c Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 14 Jul 2014 01:56:52 -0700 Subject: [PATCH 02/12] Add raw string literal ambiguity document --- src/grammar/raw-string-literal-ambiguity.md | 29 +++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 src/grammar/raw-string-literal-ambiguity.md diff --git a/src/grammar/raw-string-literal-ambiguity.md b/src/grammar/raw-string-literal-ambiguity.md new file mode 100644 index 000000000000..6b63bbcb4f06 --- /dev/null +++ b/src/grammar/raw-string-literal-ambiguity.md @@ -0,0 +1,29 @@ +Rust's lexical grammar is not context-free. Raw string literals are the source +of the problem. Informally, a raw string literal is an `r`, followed by `N` +hashes (where N can be zero), a quote, any characters, then a quote followed +by `N` hashes. This grammar describes this as best possible: + + R -> 'r' S + S -> '"' B '"' + S -> '#' S '#' + B -> . B + B -> ε + +Where `.` represents any character, and `ε` the empty string. Consider the +string `r#""#"#`. This string is not a valid raw string literal, but can be +accepted as one by the above grammar, using the derivation: + + R : #""#"# + S : ""#" + S : "# + B : # + B : ε + +(Where `T : U` means the rule `T` is applied, and `U` is the remainder of the +string.) The difficulty arises from the fact that it is fundamentally +context-sensitive. In particular, the context needed is the number of hashes. +I know of no way to resolve this, but also have not come up with a proof that +it is not context sensitive. Such a proof would probably use the pumping lemma +for context-free languages, but I (cmr) could not come up with a proof after +spending a few hours on it, and decided my time best spent elsewhere. Pull +request welcome! From 76a15520212b1b9a72c099e0a163a31c102fcde4 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 14 Jul 2014 14:13:38 -0700 Subject: [PATCH 03/12] First pass at line comment correctness --- src/grammar/RustLexer.g4 | 45 ++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index 8a1a39aea0dd..f5d535a129f7 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -1,11 +1,23 @@ lexer grammar RustLexer; +tokens { + EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT, + MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP, + BINOPEQ, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON, + MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET, + LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR, + LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY, + LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT, + COMMENT +} + /* Note: due to antlr limitations, we can't represent XID_start and * XID_continue properly. ASCII-only substitute. */ fragment XID_start : [_a-zA-Z] ; fragment XID_continue : [_a-zA-Z0-9] ; + /* Expression-operator symbols */ EQ : '=' ; @@ -83,7 +95,7 @@ LIT_CHAR : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' ; -INT_SUFFIX +fragment INT_SUFFIX : 'i' | 'i8' | 'i16' @@ -141,25 +153,28 @@ LIFETIME : '\'' IDENT ; WHITESPACE : [ \r\n\t]+ ; -COMMENT - : '//' ~[\r\n]* - | '////' ~[\r\n]* - | BLOCK_COMMENT +LINE_COMMENT_NOT_A_TOKEN : '//' -> more, pushMode(LINE_COMMENT) ; + + +DOC_BLOCK_COMMENT + : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT) ; -mode DOCCOMMENT; +BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ; -fragment DOC_BLOCK_COMMENT - : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' +mode LINE_COMMENT; + +MAYBE_DOC_COMMENT + : '/' -> more, pushMode(LINE_DOC_COMMENT) ; -DOC_COMMENT - : '///' ~[\r\n]* - | '//!' ~[\r\n]* - | DOC_BLOCK_COMMENT +MAYBE_OUTER_DOC_COMMENT + : '!' ~[\r\n]* -> type(LINE_DOC_COMMENT), popMode ; -fragment BLOCK_COMMENT - : '/*' (BLOCK_COMMENT | .)*? '*/' - ; +COMMENT : ~[\r\n]* -> popMode ; +mode LINE_DOC_COMMENT; + +ACTUALLY_A_COMMENT : '/' ~[\r\n]* -> type(COMMENT), popMode ; +REALLY_A_DOC_COMMENT : ~[\r\n]* -> type(DOC_COMMENT), popMode ; From 9fc5cf902f9613f40ce4d4346d1ae98a0904e67a Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 14 Jul 2014 17:27:28 -0700 Subject: [PATCH 04/12] Refine the tooling, handle comments --- src/grammar/RustLexer.g4 | 23 +++--------- src/grammar/verify.rs | 76 +++++++++++++++++++++++++++++++++------- 2 files changed, 68 insertions(+), 31 deletions(-) diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index f5d535a129f7..e4640ccfb164 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -153,28 +153,13 @@ LIFETIME : '\'' IDENT ; WHITESPACE : [ \r\n\t]+ ; -LINE_COMMENT_NOT_A_TOKEN : '//' -> more, pushMode(LINE_COMMENT) ; - +UNDOC_COMMENT : '////' ~[\r\n]* -> type(COMMENT) ; +YESDOC_COMMENT : '///' ~[\r\n]* -> type(DOC_COMMENT) ; +OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ; +LINE_COMMENT : '//' ~[\r\n]* -> type(COMMENT) ; DOC_BLOCK_COMMENT : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT) ; BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ; - -mode LINE_COMMENT; - -MAYBE_DOC_COMMENT - : '/' -> more, pushMode(LINE_DOC_COMMENT) - ; - -MAYBE_OUTER_DOC_COMMENT - : '!' ~[\r\n]* -> type(LINE_DOC_COMMENT), popMode - ; - -COMMENT : ~[\r\n]* -> popMode ; - -mode LINE_DOC_COMMENT; - -ACTUALLY_A_COMMENT : '/' ~[\r\n]* -> type(COMMENT), popMode ; -REALLY_A_DOC_COMMENT : ~[\r\n]* -> type(DOC_COMMENT), popMode ; diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 56c78b89ba2e..38dd86e07729 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -108,13 +108,10 @@ fn parse_token_list(file: &str) -> HashMap { res } -fn str_to_binop(mut s: &str) -> BinOp { - if s.ends_with("'") { - s = s.slice_to(s.len() - 1); - } - +fn str_to_binop(s: &str) -> BinOp { match s { "+" => PLUS, + "/" => SLASH, "-" => MINUS, "*" => STAR, "%" => PERCENT, @@ -123,12 +120,35 @@ fn str_to_binop(mut s: &str) -> BinOp { "|" => OR, "<<" => SHL, ">>" => SHR, - _ => fail!("Bad binop str {}", s) + _ => fail!("Bad binop str `{}`", s) } } +/// Assuming a raw string/binary literal, strip out the leading/trailing +/// hashes and surrounding quotes/raw/binary prefix. +fn fix(mut lit: &str) -> ast::Name { + if lit.char_at(0) == 'r' { + if lit.char_at(1) == 'b' { + lit = lit.slice_from(2) + } else { + lit = lit.slice_from(1); + } + } else if lit.char_at(0) == 'b' { + lit = lit.slice_from(1); + } + + let leading_hashes = count(lit); + + // +1/-1 to adjust for single quotes + parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1)) +} + +fn count(lit: &str) -> uint { + lit.chars().take_while(|c| *c == '#').count() +} + fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { - let re = regex!(r"\[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?),<(?P-?\d+)>,\d+:\d+]"); + let re = regex!(r"\[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+]"); let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice()); let start = m.name("start"); @@ -137,9 +157,24 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { let content = m.name("content"); let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", toknum).as_slice()); + + let nm = parse::token::intern(content); + + debug!("What we got: content (`{}`), proto: {}", content, proto_tok); + let real_tok = match *proto_tok { - BINOP(PLUS) => BINOP(str_to_binop(content)), - BINOPEQ(PLUS) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 2))), + BINOP(..) => BINOP(str_to_binop(content)), + BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))), + LIT_STR(..) => LIT_STR(fix(content)), + LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)), + LIT_CHAR(..) => LIT_CHAR(nm), + DOC_COMMENT(..) => DOC_COMMENT(nm), + LIT_INTEGER(..) => LIT_INTEGER(nm), + LIT_FLOAT(..) => LIT_FLOAT(nm), + LIT_BINARY(..) => LIT_BINARY(nm), + LIT_BINARY_RAW(..) => LIT_BINARY_RAW(fix(content), count(content)), + IDENT(..) => IDENT(ast::Ident { name: nm, ctxt: 0 }, true), + LIFETIME(..) => LIFETIME(ast::Ident { name: nm, ctxt: 0 }), ref t => t.clone() }; @@ -161,6 +196,16 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { } } +fn tok_cmp(a: &Token, b: &Token) -> bool { + match a { + &IDENT(id, _) => match b { + &IDENT(id2, _) => id == id2, + _ => false + }, + _ => a == b + } +} + fn main() { fn next(r: &mut lexer::StringReader) -> TokenAndSpan { use syntax::parse::lexer::Reader; @@ -173,7 +218,8 @@ fn main() { let code = File::open(&Path::new(std::os::args().get(1).as_slice())).unwrap().read_to_string().unwrap(); let options = config::basic_options(); - let session = session::build_session(options, None); + let session = session::build_session(options, None, + syntax::diagnostics::registry::Registry::new([])); let filemap = parse::string_to_filemap(&session.parse_sess, code, String::from_str("")); @@ -191,10 +237,16 @@ fn main() { ( $($x:pat),+ ) => ( match rustc_tok.tok { $($x => match antlr_tok.tok { - $x => (), + $x => { + if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) { + // FIXME #15677: needs more robust escaping in + // antlr + warn!("Different names for {} and {}", rustc_tok, antlr_tok); + } + } _ => fail!("{} is not {}", antlr_tok, rustc_tok) },)* - ref c => assert!(c == antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok) + ref c => assert!(c == &antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok) } ) ) From f8fd32ef9dd48a216ae5ca44ca65ea8f2205f581 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 14 Jul 2014 20:45:39 -0700 Subject: [PATCH 05/12] Byte/raw binary literal fixes --- src/grammar/.gitignore | 4 ++++ src/grammar/RustLexer.g4 | 9 +++++++-- src/grammar/verify.rs | 15 +++++++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 src/grammar/.gitignore diff --git a/src/grammar/.gitignore b/src/grammar/.gitignore new file mode 100644 index 000000000000..e77db28967e3 --- /dev/null +++ b/src/grammar/.gitignore @@ -0,0 +1,4 @@ +verify +*.class +*.java +*.tokens diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index e4640ccfb164..f2705e5421b9 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -44,6 +44,7 @@ SHR : '>>' ; BINOP : PLUS + | SLASH | MINUS | STAR | PERCENT @@ -95,6 +96,10 @@ LIT_CHAR : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' ; +LIT_BYTE + : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' + ; + fragment INT_SUFFIX : 'i' | 'i8' @@ -130,7 +135,7 @@ LIT_STR ; LIT_BINARY : 'b' LIT_STR ; -LIT_BINARY_RAW : 'b' LIT_STR_RAW ; +LIT_BINARY_RAW : 'rb' LIT_STR_RAW ; /* this is a bit messy */ @@ -159,7 +164,7 @@ OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ; LINE_COMMENT : '//' ~[\r\n]* -> type(COMMENT) ; DOC_BLOCK_COMMENT - : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT) + : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT) ; BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ; diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 38dd86e07729..c20aebe65d50 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -71,6 +71,7 @@ fn parse_token_list(file: &str) -> HashMap { "IDENT" => id(), "PLUS" => BINOP(PLUS), "LIT_CHAR" => LIT_CHAR(Name(0)), + "LIT_BYTE" => LIT_BYTE(Name(0)), "EQ" => EQ, "RBRACKET" => RBRACKET, "COMMENT" => COMMENT, @@ -124,7 +125,7 @@ fn str_to_binop(s: &str) -> BinOp { } } -/// Assuming a raw string/binary literal, strip out the leading/trailing +/// Assuming a string/binary literal, strip out the leading/trailing /// hashes and surrounding quotes/raw/binary prefix. fn fix(mut lit: &str) -> ast::Name { if lit.char_at(0) == 'r' { @@ -143,6 +144,15 @@ fn fix(mut lit: &str) -> ast::Name { parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1)) } +/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes. +fn fixchar(mut lit: &str) -> ast::Name { + if lit.char_at(0) == 'b' { + lit = lit.slice_from(1); + } + + parse::token::intern(lit.slice(1, lit.len() - 1)) +} + fn count(lit: &str) -> uint { lit.chars().take_while(|c| *c == '#').count() } @@ -167,7 +177,8 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))), LIT_STR(..) => LIT_STR(fix(content)), LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)), - LIT_CHAR(..) => LIT_CHAR(nm), + LIT_CHAR(..) => LIT_CHAR(fixchar(content)), + LIT_BYTE(..) => LIT_BYTE(fixchar(content)), DOC_COMMENT(..) => DOC_COMMENT(nm), LIT_INTEGER(..) => LIT_INTEGER(nm), LIT_FLOAT(..) => LIT_FLOAT(nm), From 188d889aaf8cccb73243812881ce1030355386cc Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 14 Jul 2014 20:46:04 -0700 Subject: [PATCH 06/12] ignore-lexer-test to broken files and remove some tray hyphens I blame @ChrisMorgan for the hyphens. --- src/libcollections/slice.rs | 2 +- src/libcollections/str.rs | 2 ++ src/libcollections/string.rs | 2 ++ src/libcore/str.rs | 2 ++ src/libcoretest/char.rs | 2 ++ src/libgetopts/lib.rs | 2 ++ src/libgreen/macros.rs | 1 + src/libregex/lib.rs | 2 ++ src/librustrt/util.rs | 2 ++ src/libserialize/base64.rs | 2 ++ src/libserialize/hex.rs | 2 ++ src/libstd/ascii.rs | 2 ++ src/libstd/fmt.rs | 2 ++ src/libsyntax/codemap.rs | 2 ++ src/libtime/lib.rs | 16 ++++++++-------- src/libunicode/u_str.rs | 2 ++ 16 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/libcollections/slice.rs b/src/libcollections/slice.rs index 40cf8495a405..69ca2a4107ce 100644 --- a/src/libcollections/slice.rs +++ b/src/libcollections/slice.rs @@ -155,7 +155,7 @@ impl<'a, T: Clone, V: Vector> VectorVector for &'a [V] { /// a sequence of all possible permutations for an indexed sequence of /// elements. Each permutation is only a single swap apart. /// -/// The Steinhaus–Johnson–Trotter algorithm is used. +/// The Steinhaus-Johnson-Trotter algorithm is used. /// /// Generates even and odd permutations alternately. /// diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs index 19db88453809..b11f98f52d9f 100644 --- a/src/libcollections/str.rs +++ b/src/libcollections/str.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 /*! diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs index 74b9465f2a56..7e94cfc1b96f 100644 --- a/src/libcollections/string.rs +++ b/src/libcollections/string.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! An owned, growable string that enforces that its contents are valid UTF-8. diff --git a/src/libcore/str.rs b/src/libcore/str.rs index aa2050dacf1a..c5ef1eca44da 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! String manipulation //! diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs index 51d13535caf3..ebc6e9862288 100644 --- a/src/libcoretest/char.rs +++ b/src/libcoretest/char.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 use core::char::{escape_unicode, escape_default}; diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs index eaec31a45f42..8069b7002708 100644 --- a/src/libgetopts/lib.rs +++ b/src/libgetopts/lib.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15677 //! Simple getopt alternative. //! diff --git a/src/libgreen/macros.rs b/src/libgreen/macros.rs index eddf17b34b9f..4cce430d88a8 100644 --- a/src/libgreen/macros.rs +++ b/src/libgreen/macros.rs @@ -9,6 +9,7 @@ // except according to those terms. // FIXME: this file probably shouldn't exist +// ignore-lexer-test FIXME #15677 #![macro_escape] diff --git a/src/libregex/lib.rs b/src/libregex/lib.rs index 1bb7f605e547..fae3e5986806 100644 --- a/src/libregex/lib.rs +++ b/src/libregex/lib.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! This crate provides a native implementation of regular expressions that is //! heavily based on RE2 both in syntax and in implementation. Notably, diff --git a/src/librustrt/util.rs b/src/librustrt/util.rs index 40c3e19576e4..1334000ed1f5 100644 --- a/src/librustrt/util.rs +++ b/src/librustrt/util.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15677 use core::prelude::*; diff --git a/src/libserialize/base64.rs b/src/libserialize/base64.rs index 63cfbd6d9aa1..bd81091bd55e 100644 --- a/src/libserialize/base64.rs +++ b/src/libserialize/base64.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! Base64 binary-to-text encoding use std::str; diff --git a/src/libserialize/hex.rs b/src/libserialize/hex.rs index 51fab7b13545..568c4dafd155 100644 --- a/src/libserialize/hex.rs +++ b/src/libserialize/hex.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! Hex binary-to-text encoding use std::str; diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs index 796147ce7a05..eccee007d5cd 100644 --- a/src/libstd/ascii.rs +++ b/src/libstd/ascii.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! Operations on ASCII strings and characters diff --git a/src/libstd/fmt.rs b/src/libstd/fmt.rs index 5834e576b081..13c52545274b 100644 --- a/src/libstd/fmt.rs +++ b/src/libstd/fmt.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 /*! diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index ef4024a8f83f..2f30108c27bd 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 /*! diff --git a/src/libtime/lib.rs b/src/libtime/lib.rs index 7655ace0ecb0..f8f63d502739 100644 --- a/src/libtime/lib.rs +++ b/src/libtime/lib.rs @@ -206,28 +206,28 @@ pub fn tzset() { /// also called a broken-down time value. #[deriving(Clone, PartialEq, Show)] pub struct Tm { - /// Seconds after the minute – [0, 60] + /// Seconds after the minute - [0, 60] pub tm_sec: i32, - /// Minutes after the hour – [0, 59] + /// Minutes after the hour - [0, 59] pub tm_min: i32, - /// Hours after midnight – [0, 23] + /// Hours after midnight - [0, 23] pub tm_hour: i32, - /// Day of the month – [1, 31] + /// Day of the month - [1, 31] pub tm_mday: i32, - /// Months since January – [0, 11] + /// Months since January - [0, 11] pub tm_mon: i32, /// Years since 1900 pub tm_year: i32, - /// Days since Sunday – [0, 6]. 0 = Sunday, 1 = Monday, …, 6 = Saturday. + /// Days since Sunday - [0, 6]. 0 = Sunday, 1 = Monday, ..., 6 = Saturday. pub tm_wday: i32, - /// Days since January 1 – [0, 365] + /// Days since January 1 - [0, 365] pub tm_yday: i32, /// Daylight Saving Time flag. @@ -241,7 +241,7 @@ pub struct Tm { /// for U.S. Pacific Daylight Time, the value is -7*60*60 = -25200. pub tm_gmtoff: i32, - /// Nanoseconds after the second – [0, 109 - 1] + /// Nanoseconds after the second - [0, 109 - 1] pub tm_nsec: i32, } diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs index 84a2eab4b251..85f311d47eb2 100644 --- a/src/libunicode/u_str.rs +++ b/src/libunicode/u_str.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 /*! * Unicode-intensive string manipulations. From cbd6799110b858505c271b8169e356c62e34b80a Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Tue, 15 Jul 2014 00:18:17 -0700 Subject: [PATCH 07/12] lexer tests: makefile/configure --- Makefile.in | 13 +++++++++++++ configure | 3 +++ mk/grammar.mk | 43 +++++++++++++++++++++++++++++++++++++++++++ src/grammar/check.sh | 21 +++++++++++++++++++++ src/grammar/verify.rs | 6 ++++-- 5 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 mk/grammar.mk create mode 100755 src/grammar/check.sh diff --git a/Makefile.in b/Makefile.in index a8a63a42066b..2612761cef95 100644 --- a/Makefile.in +++ b/Makefile.in @@ -252,6 +252,19 @@ ifneq ($(findstring clean,$(MAKECMDGOALS)),) include $(CFG_SRC_DIR)mk/clean.mk endif +# Grammar tests + +ifneq ($(findstring lexer,$(MAKECMDGOALS)),) + ifdef CFG_JAVAC + ifdef CFG_ANTLR4 + ifdef CFG_GRUN + CFG_INFO := $(info cfg: including grammar tests) + include $(CFG_SRC_DIR)mk/grammar.mk + endif + endif + endif +endif + # CTAGS building ifneq ($(strip $(findstring TAGS.emacs,$(MAKECMDGOALS)) \ $(findstring TAGS.vi,$(MAKECMDGOALS))),) diff --git a/configure b/configure index 135bdcd3782f..b6513cb0f74c 100755 --- a/configure +++ b/configure @@ -493,6 +493,9 @@ probe CFG_VALGRIND valgrind probe CFG_PERF perf probe CFG_ISCC iscc probe CFG_LLNEXTGEN LLnextgen +probe CFG_JAVAC javac +probe CFG_ANTLR4 antlr4 +probe CFG_GRUN grun probe CFG_PANDOC pandoc probe CFG_PDFLATEX pdflatex probe CFG_XELATEX xelatex diff --git a/mk/grammar.mk b/mk/grammar.mk new file mode 100644 index 000000000000..03e253c7278c --- /dev/null +++ b/mk/grammar.mk @@ -0,0 +1,43 @@ +# Copyright 2014 The Rust Project Developers. See the COPYRIGHT +# file at the top-level directory of this distribution and at +# http://rust-lang.org/COPYRIGHT. +# +# Licensed under the Apache License, Version 2.0 or the MIT license +# , at your +# option. This file may not be copied, modified, or distributed +# except according to those terms. + +BG = $(CFG_BUILD_DIR)/grammar/ +SG = $(S)src/grammar/ +B = $(CFG_BUILD_DIR)/$(CFG_BUILD)/stage2/ +L = $(B)lib/rustlib/$(CFG_BUILD)/lib +LD = $(CFG_BUILD)/stage2/lib/rustlib/$(CFG_BUILD)/lib/ +RUSTC = $(B)bin/rustc + +# Run the reference lexer against libsyntax and compare the tokens and spans. +# If "// ignore-lexer-test" is present in the file, it will be ignored. +# +# $(1) is the file to test. +define LEXER_TEST +grep "// ignore-lexer-test" $(1) ; \ + if [ $$? -eq 1 ]; then \ + CLASSPATH=$(B)grammar $(CFG_GRUN) RustLexer tokens -tokens < $(1) \ + | $(B)grammar/verify $(1) ; \ + fi +endef + +$(BG): + $(Q)mkdir -p $(BG) + +$(BG)RustLexer.class: $(SG)RustLexer.g4 + $(Q)$(CFG_ANTLR4) -o $(B)grammar $(SG)RustLexer.g4 + $(Q)$(CFG_JAVAC) -d $(BG) $(BG)RustLexer.java + +$(BG)verify: $(SG)verify.rs rustc-stage2-H-$(CFG_BUILD) $(LD)stamp.regex_macros $(LD)stamp.rustc + $(Q)$(RUSTC) -O --out-dir $(BG) -L $(L) $(SG)verify.rs + +check-lexer: $(BG) $(BG)RustLexer.class $(BG)verify + $(info Verifying libsyntax against the reference lexer ...) + $(Q)find $(S) -iname '*.rs' -exec "$(SG)check.sh" {} "$(BG)" \ + "$(CFG_GRUN)" "$(BG)verify" "$(BG)RustLexer.tokens" "$(VERBOSE)" \; diff --git a/src/grammar/check.sh b/src/grammar/check.sh new file mode 100755 index 000000000000..3ddbb8a34c81 --- /dev/null +++ b/src/grammar/check.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +# Run the reference lexer against libsyntax and compare the tokens and spans. +# If "// ignore-lexer-test" is present in the file, it will be ignored. +# +# Argument $1 is the file to check, $2 is the classpath to use, $3 is the path +# to the grun binary, $4 is the path to the verify binary, $5 is the path to +# RustLexer.tokens + +if [ "${VERBOSE}" == "1" ]; then + set -x +fi + +grep -q "// ignore lexer-test" $1; + +if [ $? -eq 1 ]; then + cd $2 # This `cd` is so java will pick up RustLexer.class. I couldn't + # figure out how to wrangle the CLASSPATH, just adding build/grammr didn't + # seem to have anny effect. + $3 RustLexer tokens -tokens < $1 | $4 $1 $5 +fi diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index c20aebe65d50..7fddf9b887de 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -223,11 +223,13 @@ fn main() { r.next_token() } - let token_map = parse_token_list(File::open(&Path::new("RustLexer.tokens")).unwrap().read_to_string().unwrap().as_slice()); + let args = std::os::args(); + + let token_map = parse_token_list(File::open(&Path::new(args.get(2).as_slice())).unwrap().read_to_string().unwrap().as_slice()); let mut stdin = std::io::stdin(); let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map)); - let code = File::open(&Path::new(std::os::args().get(1).as_slice())).unwrap().read_to_string().unwrap(); + let code = File::open(&Path::new(args.get(1).as_slice())).unwrap().read_to_string().unwrap(); let options = config::basic_options(); let session = session::build_session(options, None, syntax::diagnostics::registry::Registry::new([])); From dd3afb42d1cc5eb11f2e024167aca0a6d6173b98 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 21 Jul 2014 12:59:25 -0700 Subject: [PATCH 08/12] Break apart long lines in verify.rs --- src/grammar/verify.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 7fddf9b887de..a6a1a75854d5 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -225,7 +225,9 @@ fn main() { let args = std::os::args(); - let token_map = parse_token_list(File::open(&Path::new(args.get(2).as_slice())).unwrap().read_to_string().unwrap().as_slice()); + let mut token_file = File::open(&Path::new(args.get(2).as_slice())); + let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice()); + let mut stdin = std::io::stdin(); let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map)); From c41a7dfcc7b1c71305bd1816bb2e6aff7abddbb2 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 21 Jul 2014 13:04:35 -0700 Subject: [PATCH 09/12] Shuffle around check-lexer conditions --- Makefile.in | 14 +------------- mk/grammar.mk | 16 ++++++++++++++-- mk/tests.mk | 4 +++- src/grammar/check.sh | 31 ++++++++++++++++++++++--------- src/grammar/verify.rs | 23 +++++++++++++++++++---- 5 files changed, 59 insertions(+), 29 deletions(-) diff --git a/Makefile.in b/Makefile.in index 2612761cef95..5683eb7ba06a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -216,6 +216,7 @@ ifneq ($(strip $(findstring check,$(MAKECMDGOALS)) \ $(findstring tidy,$(MAKECMDGOALS))),) CFG_INFO := $(info cfg: including test rules) include $(CFG_SRC_DIR)mk/tests.mk + include $(CFG_SRC_DIR)mk/grammar.mk endif # Performance and benchmarking @@ -252,19 +253,6 @@ ifneq ($(findstring clean,$(MAKECMDGOALS)),) include $(CFG_SRC_DIR)mk/clean.mk endif -# Grammar tests - -ifneq ($(findstring lexer,$(MAKECMDGOALS)),) - ifdef CFG_JAVAC - ifdef CFG_ANTLR4 - ifdef CFG_GRUN - CFG_INFO := $(info cfg: including grammar tests) - include $(CFG_SRC_DIR)mk/grammar.mk - endif - endif - endif -endif - # CTAGS building ifneq ($(strip $(findstring TAGS.emacs,$(MAKECMDGOALS)) \ $(findstring TAGS.vi,$(MAKECMDGOALS))),) diff --git a/mk/grammar.mk b/mk/grammar.mk index 03e253c7278c..c0afa3eb7694 100644 --- a/mk/grammar.mk +++ b/mk/grammar.mk @@ -38,6 +38,18 @@ $(BG)verify: $(SG)verify.rs rustc-stage2-H-$(CFG_BUILD) $(LD)stamp.regex_macros $(Q)$(RUSTC) -O --out-dir $(BG) -L $(L) $(SG)verify.rs check-lexer: $(BG) $(BG)RustLexer.class $(BG)verify +ifdef CFG_JAVAC +ifdef CFG_ANTLR4 +ifdef CFG_GRUN $(info Verifying libsyntax against the reference lexer ...) - $(Q)find $(S) -iname '*.rs' -exec "$(SG)check.sh" {} "$(BG)" \ - "$(CFG_GRUN)" "$(BG)verify" "$(BG)RustLexer.tokens" "$(VERBOSE)" \; + $(Q)$(SG)check.sh $(S) "$(BG)" \ + "$(CFG_GRUN)" "$(BG)verify" "$(BG)RustLexer.tokens" +else +$(info grun not available, skipping lexer test...) +endif +else +$(info antlr4 not available, skipping lexer test...) +endif +else +$(info javac not available, skipping lexer test...) +endif diff --git a/mk/tests.mk b/mk/tests.mk index d2e4388521ec..6068af8f7f46 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -171,7 +171,7 @@ endif # Main test targets ###################################################################### -check: cleantmptestlogs cleantestlibs check-notidy tidy +check: cleantmptestlogs cleantestlibs check-notidy tidy check-syntax check-notidy: cleantmptestlogs cleantestlibs all check-stage2 $(Q)$(CFG_PYTHON) $(S)src/etc/check-summary.py tmp/*.log @@ -192,6 +192,8 @@ check-docs: cleantestlibs cleantmptestlogs check-stage2-docs # NOTE: Remove after reprogramming windows bots check-fast: check-lite +check-syntax: check-lexer + .PHONY: cleantmptestlogs cleantestlibs cleantmptestlogs: diff --git a/src/grammar/check.sh b/src/grammar/check.sh index 3ddbb8a34c81..69ec490a08a3 100755 --- a/src/grammar/check.sh +++ b/src/grammar/check.sh @@ -2,20 +2,33 @@ # Run the reference lexer against libsyntax and compare the tokens and spans. # If "// ignore-lexer-test" is present in the file, it will be ignored. -# + + # Argument $1 is the file to check, $2 is the classpath to use, $3 is the path # to the grun binary, $4 is the path to the verify binary, $5 is the path to # RustLexer.tokens - if [ "${VERBOSE}" == "1" ]; then set -x fi -grep -q "// ignore lexer-test" $1; +check() { + grep --silent "// ignore-lexer-test" $1; -if [ $? -eq 1 ]; then - cd $2 # This `cd` is so java will pick up RustLexer.class. I couldn't - # figure out how to wrangle the CLASSPATH, just adding build/grammr didn't - # seem to have anny effect. - $3 RustLexer tokens -tokens < $1 | $4 $1 $5 -fi + # if it's *not* found... + if [ $? -eq 1 ]; then + cd $2 # This `cd` is so java will pick up RustLexer.class. I couldn't + # figure out how to wrangle the CLASSPATH, just adding build/grammr didn't + # seem to have anny effect. + if $3 RustLexer tokens -tokens < $1 | $4 $1 $5; then + echo "pass: $1" + else + echo "fail: $1" + fi + else + echo "skip: $1" + fi +} + +for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail/*' ); do + check $file $2 $3 $4 $5 +done diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index a6a1a75854d5..f2ae5a1ea4e5 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -1,3 +1,13 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + #![feature(globs, phase, macro_rules)] extern crate syntax; @@ -158,7 +168,9 @@ fn count(lit: &str) -> uint { } fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { - let re = regex!(r"\[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+]"); + let re = regex!( + r"\[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+]" + ); let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice()); let start = m.name("start"); @@ -166,7 +178,8 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { let toknum = m.name("toknum"); let content = m.name("content"); - let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", toknum).as_slice()); + let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", + toknum).as_slice()); let nm = parse::token::intern(content); @@ -229,7 +242,8 @@ fn main() { let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice()); let mut stdin = std::io::stdin(); - let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map)); + let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), + &token_map)); let code = File::open(&Path::new(args.get(1).as_slice())).unwrap().read_to_string().unwrap(); let options = config::basic_options(); @@ -246,7 +260,8 @@ fn main() { continue } - assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok, antlr_tok); + assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok, + antlr_tok); macro_rules! matches ( ( $($x:pat),+ ) => ( From 35c0bf32926ec8095a7ca5b5b77e612c15ddf098 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 21 Jul 2014 15:57:14 -0700 Subject: [PATCH 10/12] Add a ton of ignore-lexer-test --- src/grammar/check.sh | 2 +- src/libcollections/hash/sip.rs | 2 ++ src/libcore/num/mod.rs | 2 ++ src/libnative/io/tty_win32.rs | 2 ++ src/librand/distributions/gamma.rs | 2 ++ src/libregex/test/tests.rs | 1 + src/libstd/collections/hashmap.rs | 2 ++ src/libstd/io/buffered.rs | 2 ++ src/libstd/io/fs.rs | 2 ++ src/libstd/io/mem.rs | 2 ++ src/libstd/io/mod.rs | 2 ++ src/libstd/num/strconv.rs | 2 ++ src/libstd/path/windows.rs | 2 ++ src/libsyntax/ext/tt/macro_parser.rs | 2 ++ src/test/bench/core-std.rs | 1 + src/test/bench/msgsend-ring-mutex-arcs.rs | 1 + src/test/bench/msgsend-ring-rw-arcs.rs | 1 + src/test/bench/noise.rs | 1 + src/test/pretty/block-comment-wchar.rs | 1 + src/test/run-pass/byte-literals.rs | 2 ++ .../default-method-supertrait-vtable.rs | 2 ++ src/test/run-pass/ifmt.rs | 1 + src/test/run-pass/issue-12582.rs | 2 ++ src/test/run-pass/issue-13027.rs | 2 ++ src/test/run-pass/issue-2185.rs | 1 + src/test/run-pass/issue-2718.rs | 2 ++ src/test/run-pass/issue-3683.rs | 2 ++ src/test/run-pass/issue-4759-1.rs | 2 ++ src/test/run-pass/issue-5280.rs | 2 ++ .../issue-5321-immediates-with-bare-self.rs | 2 ++ ...line-endings-string-literal-doc-comment.rs | 1 + ...ase-types-non-uppercase-statics-unicode.rs | 2 ++ src/test/run-pass/match-range.rs | 2 ++ src/test/run-pass/multibyte.rs | 2 ++ src/test/run-pass/raw-str.rs | Bin 1305 -> 1339 bytes src/test/run-pass/shebang.rs | 2 ++ src/test/run-pass/struct-return.rs | 2 ++ src/test/run-pass/trait-to-str.rs | 3 ++- .../run-pass/trait-with-bounds-default.rs | 2 ++ .../run-pass/traits-default-method-self.rs | 2 ++ .../run-pass/traits-default-method-trivial.rs | 2 ++ src/test/run-pass/unsized.rs | 2 ++ src/test/run-pass/unsized2.rs | 2 ++ src/test/run-pass/utf8-bom.rs | 2 ++ src/test/run-pass/utf8.rs | 3 ++- src/test/run-pass/utf8_chars.rs | 2 ++ src/test/run-pass/utf8_idents.rs | 2 ++ 47 files changed, 82 insertions(+), 3 deletions(-) diff --git a/src/grammar/check.sh b/src/grammar/check.sh index 69ec490a08a3..f2836312437c 100755 --- a/src/grammar/check.sh +++ b/src/grammar/check.sh @@ -29,6 +29,6 @@ check() { fi } -for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail/*' ); do +for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail*'); do check $file $2 $3 $4 $5 done diff --git a/src/libcollections/hash/sip.rs b/src/libcollections/hash/sip.rs index 1c7e03f70c88..7168af89b59e 100644 --- a/src/libcollections/hash/sip.rs +++ b/src/libcollections/hash/sip.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15883 /*! * Implementation of SipHash 2-4 diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs index 3230873883e1..3ffc1d5e11c7 100644 --- a/src/libcore/num/mod.rs +++ b/src/libcore/num/mod.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! Numeric traits and functions for generic mathematics diff --git a/src/libnative/io/tty_win32.rs b/src/libnative/io/tty_win32.rs index 72cf5e785fb9..e98fe1e20b19 100644 --- a/src/libnative/io/tty_win32.rs +++ b/src/libnative/io/tty_win32.rs @@ -8,6 +8,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +// ignore-lexer-test FIXME #15877 + //! Windows specific console TTY implementation //! //! This module contains the implementation of a Windows specific console TTY. diff --git a/src/librand/distributions/gamma.rs b/src/librand/distributions/gamma.rs index a9f24e1a9ecc..7b6e94eaa920 100644 --- a/src/librand/distributions/gamma.rs +++ b/src/librand/distributions/gamma.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! The Gamma and derived distributions. diff --git a/src/libregex/test/tests.rs b/src/libregex/test/tests.rs index 251ab10ad34e..48065992bb05 100644 --- a/src/libregex/test/tests.rs +++ b/src/libregex/test/tests.rs @@ -9,6 +9,7 @@ // except according to those terms. // ignore-tidy-linelength +// ignore-lexer-test FIXME #15679 use regex::{Regex, NoExpand}; diff --git a/src/libstd/collections/hashmap.rs b/src/libstd/collections/hashmap.rs index 098e87243b69..f9c99e54df01 100644 --- a/src/libstd/collections/hashmap.rs +++ b/src/libstd/collections/hashmap.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15883 //! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types) diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs index 4f355502eb88..e25006a7b395 100644 --- a/src/libstd/io/buffered.rs +++ b/src/libstd/io/buffered.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15883 //! Buffering wrappers for I/O traits diff --git a/src/libstd/io/fs.rs b/src/libstd/io/fs.rs index 449ad6fa0dab..e25ec5366c9f 100644 --- a/src/libstd/io/fs.rs +++ b/src/libstd/io/fs.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 /*! Synchronous File I/O diff --git a/src/libstd/io/mem.rs b/src/libstd/io/mem.rs index 1c0251c8369d..b93b84b7d63f 100644 --- a/src/libstd/io/mem.rs +++ b/src/libstd/io/mem.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! Readers and Writers for in-memory buffers diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs index 6ac092fd8c65..42ce2180fa67 100644 --- a/src/libstd/io/mod.rs +++ b/src/libstd/io/mod.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15883 // FIXME: cover these topics: // path, reader, writer, stream, raii (close not needed), diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs index 88fc6e1ffd85..cc30acf064b8 100644 --- a/src/libstd/num/strconv.rs +++ b/src/libstd/num/strconv.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 #![allow(missing_doc)] diff --git a/src/libstd/path/windows.rs b/src/libstd/path/windows.rs index 88ae0d4837e5..0de098319e8b 100644 --- a/src/libstd/path/windows.rs +++ b/src/libstd/path/windows.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15883 //! Windows file path handling diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index bdf1f6eb6007..509d5bd44218 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 //! This is an Earley-like parser, without support for in-grammar nonterminals, //! only by calling out to the main rust parser for named nonterminals (which it diff --git a/src/test/bench/core-std.rs b/src/test/bench/core-std.rs index fd3c4daebdb8..9af3c0c6c8c1 100644 --- a/src/test/bench/core-std.rs +++ b/src/test/bench/core-std.rs @@ -8,6 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +// ignore-lexer-test FIXME #15679 // Microbenchmarks for various functions in std and extra #![feature(macro_rules)] diff --git a/src/test/bench/msgsend-ring-mutex-arcs.rs b/src/test/bench/msgsend-ring-mutex-arcs.rs index 2b9abfbc350a..a0ff7736b5c7 100644 --- a/src/test/bench/msgsend-ring-mutex-arcs.rs +++ b/src/test/bench/msgsend-ring-mutex-arcs.rs @@ -16,6 +16,7 @@ // This also serves as a pipes test, because Arcs are implemented with pipes. // no-pretty-expanded FIXME #15189 +// ignore-lexer-test FIXME #15679 extern crate time; diff --git a/src/test/bench/msgsend-ring-rw-arcs.rs b/src/test/bench/msgsend-ring-rw-arcs.rs index afed753f455b..6512ecfb3e26 100644 --- a/src/test/bench/msgsend-ring-rw-arcs.rs +++ b/src/test/bench/msgsend-ring-rw-arcs.rs @@ -16,6 +16,7 @@ // This also serves as a pipes test, because Arcs are implemented with pipes. // no-pretty-expanded FIXME #15189 +// ignore-lexer-test FIXME #15679 extern crate time; diff --git a/src/test/bench/noise.rs b/src/test/bench/noise.rs index 6ec1d5395cf0..bdca03490369 100644 --- a/src/test/bench/noise.rs +++ b/src/test/bench/noise.rs @@ -10,6 +10,7 @@ // Multi-language Perlin noise benchmark. // See https://github.com/nsf/pnoise for timings and alternative implementations. +// ignore-lexer-test FIXME #15679 use std::f32::consts::PI; use std::rand::{Rng, StdRng}; diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs index 777c456335dc..06ee3715eb04 100644 --- a/src/test/pretty/block-comment-wchar.rs +++ b/src/test/pretty/block-comment-wchar.rs @@ -14,6 +14,7 @@ // ignore-tidy-cr // ignore-tidy-tab // pp-exact:block-comment-wchar.pp +// ignore-lexer-test FIXME #15679 fn f() { fn nested() { /* diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs index ac470268d319..7fd7e3dbf004 100644 --- a/src/test/run-pass/byte-literals.rs +++ b/src/test/run-pass/byte-literals.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15883 static FOO: u8 = b'\xF0'; diff --git a/src/test/run-pass/default-method-supertrait-vtable.rs b/src/test/run-pass/default-method-supertrait-vtable.rs index 2bcf264bb1f1..1b2b17f99171 100644 --- a/src/test/run-pass/default-method-supertrait-vtable.rs +++ b/src/test/run-pass/default-method-supertrait-vtable.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 // Tests that we can call a function bounded over a supertrait from diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs index e349f91a309f..fabcfc5ff334 100644 --- a/src/test/run-pass/ifmt.rs +++ b/src/test/run-pass/ifmt.rs @@ -9,6 +9,7 @@ // except according to those terms. // no-pretty-expanded unnecessary unsafe block generated +// ignore-lexer-test FIXME #15679 #![feature(macro_rules, managed_boxes)] #![deny(warnings)] diff --git a/src/test/run-pass/issue-12582.rs b/src/test/run-pass/issue-12582.rs index 418fd54cc139..f68ba5dab8ae 100644 --- a/src/test/run-pass/issue-12582.rs +++ b/src/test/run-pass/issue-12582.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 pub fn main() { let x = 1i; diff --git a/src/test/run-pass/issue-13027.rs b/src/test/run-pass/issue-13027.rs index e1634e44847e..0efe64448c3d 100644 --- a/src/test/run-pass/issue-13027.rs +++ b/src/test/run-pass/issue-13027.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 // Tests that match expression handles overlapped literal and range // properly in the presence of guard function. diff --git a/src/test/run-pass/issue-2185.rs b/src/test/run-pass/issue-2185.rs index 492e76552d45..974905487fe2 100644 --- a/src/test/run-pass/issue-2185.rs +++ b/src/test/run-pass/issue-2185.rs @@ -9,6 +9,7 @@ // except according to those terms. // ignore-test +// ignore-lexer-test FIXME #15881 // notes on this test case: // On Thu, Apr 18, 2013-2014 at 6:30 PM, John Clements wrote: diff --git a/src/test/run-pass/issue-2718.rs b/src/test/run-pass/issue-2718.rs index b4807964d46d..c52dd5ce5e4e 100644 --- a/src/test/run-pass/issue-2718.rs +++ b/src/test/run-pass/issue-2718.rs @@ -8,6 +8,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15883 #![feature(unsafe_destructor)] diff --git a/src/test/run-pass/issue-3683.rs b/src/test/run-pass/issue-3683.rs index aa7fa0cb5f04..e6c816666e79 100644 --- a/src/test/run-pass/issue-3683.rs +++ b/src/test/run-pass/issue-3683.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 trait Foo { diff --git a/src/test/run-pass/issue-4759-1.rs b/src/test/run-pass/issue-4759-1.rs index ad8ee984217c..ce2f488b90c7 100644 --- a/src/test/run-pass/issue-4759-1.rs +++ b/src/test/run-pass/issue-4759-1.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 trait U { fn f(self); } impl U for int { fn f(self) {} } diff --git a/src/test/run-pass/issue-5280.rs b/src/test/run-pass/issue-5280.rs index 977cd08ba377..bd8924650546 100644 --- a/src/test/run-pass/issue-5280.rs +++ b/src/test/run-pass/issue-5280.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 type FontTableTag = u32; diff --git a/src/test/run-pass/issue-5321-immediates-with-bare-self.rs b/src/test/run-pass/issue-5321-immediates-with-bare-self.rs index fcb8092b7234..511b8a968306 100644 --- a/src/test/run-pass/issue-5321-immediates-with-bare-self.rs +++ b/src/test/run-pass/issue-5321-immediates-with-bare-self.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 trait Fooable { fn yes(self); diff --git a/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs b/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs index 5c8db524cc2e..421ae8e94972 100644 --- a/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs +++ b/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs @@ -16,6 +16,7 @@ // this directory should enforce it. // ignore-pretty +// ignore-lexer-test FIXME #15882 /// Doc comment that ends in CRLF pub fn foo() {} diff --git a/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs b/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs index d5e277b46e04..36c663fc8474 100644 --- a/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs +++ b/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 #![forbid(non_camel_case_types)] diff --git a/src/test/run-pass/match-range.rs b/src/test/run-pass/match-range.rs index 7421ae958840..8b782520536a 100644 --- a/src/test/run-pass/match-range.rs +++ b/src/test/run-pass/match-range.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 pub fn main() { match 5u { diff --git a/src/test/run-pass/multibyte.rs b/src/test/run-pass/multibyte.rs index ba3d89e3c7a6..77084836408a 100644 --- a/src/test/run-pass/multibyte.rs +++ b/src/test/run-pass/multibyte.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 // Test that multibyte characters don't crash the compiler pub fn main() { diff --git a/src/test/run-pass/raw-str.rs b/src/test/run-pass/raw-str.rs index da0c9eed9e6e86ddaebfec10bf08a648cd692134..35e863d05a177dd06262b4d4302b5f40886ed0aa 100644 GIT binary patch delta 36 rcmbQqwVP|hBS!I@)QZ$1-ICPe5(PKU2wztPWkXXlbIZ-w8J(B`?_LYo delta 12 TcmdnZHIr+@BgV}vOy, at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15883 pub struct Quad { a: u64, b: u64, c: u64, d: u64 } pub struct Floats { a: f64, b: u8, c: f64 } diff --git a/src/test/run-pass/trait-to-str.rs b/src/test/run-pass/trait-to-str.rs index fbe40e837de5..9b910d24bdc5 100644 --- a/src/test/run-pass/trait-to-str.rs +++ b/src/test/run-pass/trait-to-str.rs @@ -7,7 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. - +// +// ignore-lexer-test FIXME #15883 trait to_str { diff --git a/src/test/run-pass/trait-with-bounds-default.rs b/src/test/run-pass/trait-with-bounds-default.rs index fc4acfd5bb3c..ec9f666eb198 100644 --- a/src/test/run-pass/trait-with-bounds-default.rs +++ b/src/test/run-pass/trait-with-bounds-default.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 pub trait Clone2 { /// Returns a copy of the value. The contents of owned pointers diff --git a/src/test/run-pass/traits-default-method-self.rs b/src/test/run-pass/traits-default-method-self.rs index 1027008624a2..270b95452187 100644 --- a/src/test/run-pass/traits-default-method-self.rs +++ b/src/test/run-pass/traits-default-method-self.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 trait Cat { diff --git a/src/test/run-pass/traits-default-method-trivial.rs b/src/test/run-pass/traits-default-method-trivial.rs index c6a7ab5ba496..474632a7ffa5 100644 --- a/src/test/run-pass/traits-default-method-trivial.rs +++ b/src/test/run-pass/traits-default-method-trivial.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15877 trait Cat { diff --git a/src/test/run-pass/unsized.rs b/src/test/run-pass/unsized.rs index f49e8f46e78e..0530c8a6ab3d 100644 --- a/src/test/run-pass/unsized.rs +++ b/src/test/run-pass/unsized.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15879 // Test syntax checks for `Sized?` syntax. diff --git a/src/test/run-pass/unsized2.rs b/src/test/run-pass/unsized2.rs index 9703b55cda76..ada4da37ba11 100644 --- a/src/test/run-pass/unsized2.rs +++ b/src/test/run-pass/unsized2.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15879 #![feature(struct_variant)] diff --git a/src/test/run-pass/utf8-bom.rs b/src/test/run-pass/utf8-bom.rs index ccd40cb88fe0..baa4e941ff09 100644 --- a/src/test/run-pass/utf8-bom.rs +++ b/src/test/run-pass/utf8-bom.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 // This file has utf-8 BOM, it should be compiled normally without error. diff --git a/src/test/run-pass/utf8.rs b/src/test/run-pass/utf8.rs index 557d2e5878e1..a52828387bf9 100644 --- a/src/test/run-pass/utf8.rs +++ b/src/test/run-pass/utf8.rs @@ -7,7 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. - +// +// ignore-lexer-test FIXME #15679 // no-pretty-expanded FIXME #15189 pub fn main() { diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index 93c8111ad2d1..202427079a87 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 use std::str; diff --git a/src/test/run-pass/utf8_idents.rs b/src/test/run-pass/utf8_idents.rs index ee4b2061a5da..f6c4776a11cc 100644 --- a/src/test/run-pass/utf8_idents.rs +++ b/src/test/run-pass/utf8_idents.rs @@ -7,6 +7,8 @@ // , at your // option. This file may not be copied, modified, or distributed // except according to those terms. +// +// ignore-lexer-test FIXME #15679 #![feature(non_ascii_idents)] From 857bb60fe01272c338c5f89912561bdf147af94a Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 21 Jul 2014 19:26:20 -0700 Subject: [PATCH 11/12] Don't run lexer tests by default --- mk/tests.mk | 2 +- src/grammar/README.md | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mk/tests.mk b/mk/tests.mk index 6068af8f7f46..2e500ffeb0a5 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -171,7 +171,7 @@ endif # Main test targets ###################################################################### -check: cleantmptestlogs cleantestlibs check-notidy tidy check-syntax +check: cleantmptestlogs cleantestlibs check-notidy tidy check-notidy: cleantmptestlogs cleantestlibs all check-stage2 $(Q)$(CFG_PYTHON) $(S)src/etc/check-summary.py tmp/*.log diff --git a/src/grammar/README.md b/src/grammar/README.md index 69f8ab1e486a..f5b872cdc7f6 100644 --- a/src/grammar/README.md +++ b/src/grammar/README.md @@ -1,9 +1,10 @@ Reference grammar. Uses [antlr4](http://www.antlr.org/) and a custom Rust tool to compare -ASTs/token streams generated. +ASTs/token streams generated. You can use the `check-syntax` make target to +run all of the available tests. -To use: +To use manually: ``` antlr4 RustLexer.g4 From 95a1ce6f3f3a31d4e64b04637cd252cc02b623dd Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 21 Jul 2014 22:53:36 -0700 Subject: [PATCH 12/12] Fix pretty test --- src/test/pretty/block-comment-wchar.pp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp index 7def090edbb1..ef908fdd016f 100644 --- a/src/test/pretty/block-comment-wchar.pp +++ b/src/test/pretty/block-comment-wchar.pp @@ -14,6 +14,7 @@ // ignore-tidy-cr // ignore-tidy-tab // pp-exact:block-comment-wchar.pp +// ignore-lexer-test FIXME #15679 fn f() { fn nested() { /*