From 19e1f5cdb6a47070fd5f12993e947ea6db0eb5dd Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 14 Jul 2014 01:52:18 -0700
Subject: [PATCH 01/12] Lexer; subtly wrong; no makefile

---
 src/grammar/README.md    |  19 ++++
 src/grammar/RustLexer.g4 | 165 +++++++++++++++++++++++++++++
 src/grammar/verify.rs    | 217 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 401 insertions(+)
 create mode 100644 src/grammar/README.md
 create mode 100644 src/grammar/RustLexer.g4
 create mode 100644 src/grammar/verify.rs

diff --git a/src/grammar/README.md b/src/grammar/README.md
new file mode 100644
index 000000000000..69f8ab1e486a
--- /dev/null
+++ b/src/grammar/README.md
@@ -0,0 +1,19 @@
+Reference grammar.
+
+Uses [antlr4](http://www.antlr.org/) and a custom Rust tool to compare
+ASTs/token streams generated.
+
+To use:
+
+```
+antlr4 RustLexer.g4
+javac *.java
+rustc -O verify.rs
+for file in ../*/**.rs; do
+    echo $file;
+    grun RustLexer tokens -tokens < $file | ./verify $file || break
+done
+```
+
+Note That the `../*/**.rs` glob will match every `*.rs` file in the above
+directory and all of its recursive children. This is a zsh extension.
diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4
new file mode 100644
index 000000000000..8a1a39aea0dd
--- /dev/null
+++ b/src/grammar/RustLexer.g4
@@ -0,0 +1,165 @@
+lexer grammar RustLexer;
+
+/* Note: due to antlr limitations, we can't represent XID_start and
+ * XID_continue properly. ASCII-only substitute. */
+
+fragment XID_start : [_a-zA-Z] ;
+fragment XID_continue : [_a-zA-Z0-9] ;
+
+/* Expression-operator symbols */
+
+EQ      : '=' ;
+LT      : '<' ;
+LE      : '<=' ;
+EQEQ    : '==' ;
+NE      : '!=' ;
+GE      : '>=' ;
+GT      : '>' ;
+ANDAND  : '&&' ;
+OROR    : '||' ;
+NOT     : '!' ;
+TILDE   : '~' ;
+PLUS    : '+' ;
+MINUS   : '-' ;
+STAR    : '*' ;
+SLASH   : '/' ;
+PERCENT : '%' ;
+CARET   : '^' ;
+AND     : '&' ;
+OR      : '|' ;
+SHL     : '<<' ;
+SHR     : '>>' ;
+
+BINOP
+    : PLUS
+    | MINUS
+    | STAR
+    | PERCENT
+    | CARET
+    | AND
+    | OR
+    | SHL
+    | SHR
+    ;
+
+BINOPEQ : BINOP EQ ;
+
+/* "Structural symbols" */
+
+AT         : '@' ;
+DOT        : '.' ;
+DOTDOT     : '..' ;
+DOTDOTDOT  : '...' ;
+COMMA      : ',' ;
+SEMI       : ';' ;
+COLON      : ':' ;
+MOD_SEP    : '::' ;
+RARROW     : '->' ;
+FAT_ARROW  : '=>' ;
+LPAREN     : '(' ;
+RPAREN     : ')' ;
+LBRACKET   : '[' ;
+RBRACKET   : ']' ;
+LBRACE     : '{' ;
+RBRACE     : '}' ;
+POUND      : '#';
+DOLLAR     : '$' ;
+UNDERSCORE : '_' ;
+
+// Literals
+
+fragment HEXIT
+  : [0-9a-fA-F]
+  ;
+
+fragment CHAR_ESCAPE
+  : [nrt\\'"0]
+  | [xX] HEXIT HEXIT
+  | 'u' HEXIT HEXIT HEXIT HEXIT
+  | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT
+  ;
+
+LIT_CHAR
+  : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\''
+  ;
+
+INT_SUFFIX
+  : 'i'
+  | 'i8'
+  | 'i16'
+  | 'i32'
+  | 'i64'
+  | 'u'
+  | 'u8'
+  | 'u16'
+  | 'u32'
+  | 'u64'
+  ;
+
+LIT_INTEGER
+  : [0-9][0-9_]* INT_SUFFIX?
+  | '0b' [01][01_]* INT_SUFFIX?
+  | '0o' [0-7][0-7_]* INT_SUFFIX?
+  | '0x' [0-9a-fA-F][0-9a-fA-F_]* INT_SUFFIX?
+  ;
+
+FLOAT_SUFFIX
+  : 'f32'
+  | 'f64'
+  | 'f128'
+  ;
+
+LIT_FLOAT
+  : [0-9][0-9_]* ('.' | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? FLOAT_SUFFIX?)
+  ;
+
+LIT_STR
+  : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"'
+  ;
+
+LIT_BINARY : 'b' LIT_STR ;
+LIT_BINARY_RAW : 'b' LIT_STR_RAW ;
+
+/* this is a bit messy */
+
+fragment LIT_STR_RAW_INNER
+  : '"' .*? '"'
+  | LIT_STR_RAW_INNER2
+  ;
+
+fragment LIT_STR_RAW_INNER2
+  : POUND LIT_STR_RAW_INNER POUND
+  ;
+
+LIT_STR_RAW
+  : 'r' LIT_STR_RAW_INNER
+  ;
+
+IDENT : XID_start XID_continue* ;
+
+LIFETIME : '\'' IDENT ;
+
+WHITESPACE : [ \r\n\t]+ ;
+
+COMMENT
+  : '//' ~[\r\n]*
+  | '////' ~[\r\n]*
+  | BLOCK_COMMENT
+  ;
+
+mode DOCCOMMENT;
+
+fragment DOC_BLOCK_COMMENT
+  : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/'
+  ;
+
+DOC_COMMENT
+  : '///' ~[\r\n]*
+  | '//!' ~[\r\n]*
+  | DOC_BLOCK_COMMENT
+  ;
+
+fragment BLOCK_COMMENT
+  : '/*' (BLOCK_COMMENT | .)*? '*/'
+  ;
+
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
new file mode 100644
index 000000000000..56c78b89ba2e
--- /dev/null
+++ b/src/grammar/verify.rs
@@ -0,0 +1,217 @@
+#![feature(globs, phase, macro_rules)]
+
+extern crate syntax;
+extern crate rustc;
+
+#[phase(link)]
+extern crate regex;
+
+#[phase(link, plugin)]
+extern crate log;
+
+#[phase(plugin)] extern crate regex_macros;
+
+use std::collections::HashMap;
+use std::io::File;
+
+use syntax::parse;
+use syntax::parse::lexer;
+use rustc::driver::{session, config};
+
+use syntax::ast;
+use syntax::ast::Name;
+use syntax::parse::token::*;
+use syntax::parse::lexer::TokenAndSpan;
+
+fn parse_token_list(file: &str) -> HashMap<String, Token> {
+    fn id() -> Token {
+        IDENT(ast::Ident { name: Name(0), ctxt: 0, }, false)
+    }
+
+    let mut res = HashMap::new();
+
+    res.insert("-1".to_string(), EOF);
+
+    for line in file.split('\n') {
+        let eq = match line.trim().rfind('=') {
+            Some(val) => val,
+            None => continue
+        };
+
+        let val = line.slice_to(eq);
+        let num = line.slice_from(eq + 1);
+
+        let tok = match val {
+            "SHR" => BINOP(SHR),
+            "DOLLAR" => DOLLAR,
+            "LT" => LT,
+            "STAR" => BINOP(STAR),
+            "FLOAT_SUFFIX" => id(),
+            "INT_SUFFIX" => id(),
+            "SHL" => BINOP(SHL),
+            "LBRACE" => LBRACE,
+            "RARROW" => RARROW,
+            "LIT_STR" => LIT_STR(Name(0)),
+            "DOTDOT" => DOTDOT,
+            "MOD_SEP" => MOD_SEP,
+            "DOTDOTDOT" => DOTDOTDOT,
+            "NOT" => NOT,
+            "AND" => BINOP(AND),
+            "LPAREN" => LPAREN,
+            "ANDAND" => ANDAND,
+            "AT" => AT,
+            "LBRACKET" => LBRACKET,
+            "LIT_STR_RAW" => LIT_STR_RAW(Name(0), 0),
+            "RPAREN" => RPAREN,
+            "SLASH" => BINOP(SLASH),
+            "COMMA" => COMMA,
+            "LIFETIME" => LIFETIME(ast::Ident { name: Name(0), ctxt: 0 }),
+            "CARET" => BINOP(CARET),
+            "TILDE" => TILDE,
+            "IDENT" => id(),
+            "PLUS" => BINOP(PLUS),
+            "LIT_CHAR" => LIT_CHAR(Name(0)),
+            "EQ" => EQ,
+            "RBRACKET" => RBRACKET,
+            "COMMENT" => COMMENT,
+            "DOC_COMMENT" => DOC_COMMENT(Name(0)),
+            "DOT" => DOT,
+            "EQEQ" => EQEQ,
+            "NE" => NE,
+            "GE" => GE,
+            "PERCENT" => BINOP(PERCENT),
+            "RBRACE" => RBRACE,
+            "BINOP" => BINOP(PLUS),
+            "POUND" => POUND,
+            "OROR" => OROR,
+            "LIT_INTEGER" => LIT_INTEGER(Name(0)),
+            "BINOPEQ" => BINOPEQ(PLUS),
+            "LIT_FLOAT" => LIT_FLOAT(Name(0)),
+            "WHITESPACE" => WS,
+            "UNDERSCORE" => UNDERSCORE,
+            "MINUS" => BINOP(MINUS),
+            "SEMI" => SEMI,
+            "COLON" => COLON,
+            "FAT_ARROW" => FAT_ARROW,
+            "OR" => BINOP(OR),
+            "GT" => GT,
+            "LE" => LE,
+            "LIT_BINARY" => LIT_BINARY(Name(0)),
+            "LIT_BINARY_RAW" => LIT_BINARY_RAW(Name(0), 0),
+            _ => continue
+        };
+
+        res.insert(num.to_string(), tok);
+    }
+
+    debug!("Token map: {}", res);
+    res
+}
+
+fn str_to_binop(mut s: &str) -> BinOp {
+    if s.ends_with("'") {
+        s = s.slice_to(s.len() - 1);
+    }
+
+    match s {
+        "+" => PLUS,
+        "-" => MINUS,
+        "*" => STAR,
+        "%" => PERCENT,
+        "^" => CARET,
+        "&" => AND,
+        "|" => OR,
+        "<<" => SHL,
+        ">>" => SHR,
+        _ => fail!("Bad binop str {}", s)
+    }
+}
+
+fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
+    let re = regex!(r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?),<(?P<toknum>-?\d+)>,\d+:\d+]");
+
+    let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
+    let start = m.name("start");
+    let end = m.name("end");
+    let toknum = m.name("toknum");
+    let content = m.name("content");
+
+    let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", toknum).as_slice());
+    let real_tok = match *proto_tok {
+        BINOP(PLUS) => BINOP(str_to_binop(content)),
+        BINOPEQ(PLUS) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 2))),
+        ref t => t.clone()
+    };
+
+    let offset = if real_tok == EOF {
+        1
+    } else {
+        0
+    };
+
+    let sp = syntax::codemap::Span {
+        lo: syntax::codemap::BytePos(from_str::<u32>(start).unwrap() - offset),
+        hi: syntax::codemap::BytePos(from_str::<u32>(end).unwrap() + 1),
+        expn_info: None
+    };
+
+    TokenAndSpan {
+        tok: real_tok,
+        sp: sp
+    }
+}
+
+fn main() {
+    fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
+        use syntax::parse::lexer::Reader;
+        r.next_token()
+    }
+
+    let token_map = parse_token_list(File::open(&Path::new("RustLexer.tokens")).unwrap().read_to_string().unwrap().as_slice());
+    let mut stdin = std::io::stdin();
+    let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map));
+
+    let code = File::open(&Path::new(std::os::args().get(1).as_slice())).unwrap().read_to_string().unwrap();
+    let options = config::basic_options();
+    let session = session::build_session(options, None);
+    let filemap = parse::string_to_filemap(&session.parse_sess,
+                                           code,
+                                           String::from_str("<n/a>"));
+    let mut lexer = lexer::StringReader::new(session.diagnostic(), filemap);
+
+    for antlr_tok in antlr_tokens {
+        let rustc_tok = next(&mut lexer);
+        if rustc_tok.tok == EOF && antlr_tok.tok == EOF {
+            continue
+        }
+
+        assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok, antlr_tok);
+
+        macro_rules! matches (
+            ( $($x:pat),+ ) => (
+                match rustc_tok.tok {
+                    $($x => match antlr_tok.tok {
+                        $x => (),
+                        _ => fail!("{} is not {}", antlr_tok, rustc_tok)
+                    },)*
+                    ref c => assert!(c == antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok)
+                }
+            )
+        )
+
+        matches!(LIT_BYTE(..),
+            LIT_CHAR(..),
+            LIT_INTEGER(..),
+            LIT_FLOAT(..),
+            LIT_STR(..),
+            LIT_STR_RAW(..),
+            LIT_BINARY(..),
+            LIT_BINARY_RAW(..),
+            IDENT(..),
+            LIFETIME(..),
+            INTERPOLATED(..),
+            DOC_COMMENT(..),
+            SHEBANG(..)
+        );
+    }
+}

From 1a1a9d54456355cc0ebdd397fd04871abe27f78c Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 14 Jul 2014 01:56:52 -0700
Subject: [PATCH 02/12] Add raw string literal ambiguity document

---
 src/grammar/raw-string-literal-ambiguity.md | 29 +++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 src/grammar/raw-string-literal-ambiguity.md

diff --git a/src/grammar/raw-string-literal-ambiguity.md b/src/grammar/raw-string-literal-ambiguity.md
new file mode 100644
index 000000000000..6b63bbcb4f06
--- /dev/null
+++ b/src/grammar/raw-string-literal-ambiguity.md
@@ -0,0 +1,29 @@
+Rust's lexical grammar is not context-free. Raw string literals are the source
+of the problem. Informally, a raw string literal is an `r`, followed by `N`
+hashes (where N can be zero), a quote, any characters, then a quote followed
+by `N` hashes. This grammar describes this as best possible:
+
+    R -> 'r' S
+    S -> '"' B '"'
+    S -> '#' S '#'
+    B -> . B
+    B -> ε
+
+Where `.` represents any character, and `ε` the empty string. Consider the
+string `r#""#"#`. This string is not a valid raw string literal, but can be
+accepted as one by the above grammar, using the derivation:
+
+    R : #""#"#
+    S : ""#"
+    S : "#
+    B : #
+    B : ε
+
+(Where `T : U` means the rule `T` is applied, and `U` is the remainder of the
+string.) The difficulty arises from the fact that it is fundamentally
+context-sensitive. In particular, the context needed is the number of hashes.
+I know of no way to resolve this, but also have not come up with a proof that
+it is not context sensitive. Such a proof would probably use the pumping lemma
+for context-free languages, but I (cmr) could not come up with a proof after
+spending a few hours on it, and decided my time best spent elsewhere. Pull
+request welcome!

From 76a15520212b1b9a72c099e0a163a31c102fcde4 Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 14 Jul 2014 14:13:38 -0700
Subject: [PATCH 03/12] First pass at line comment correctness

---
 src/grammar/RustLexer.g4 | 45 ++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4
index 8a1a39aea0dd..f5d535a129f7 100644
--- a/src/grammar/RustLexer.g4
+++ b/src/grammar/RustLexer.g4
@@ -1,11 +1,23 @@
 lexer grammar RustLexer;
 
+tokens {
+    EQ, LT, LE, EQEQ, NE, GE, GT, ANDAND, OROR, NOT, TILDE, PLUT,
+    MINUS, STAR, SLASH, PERCENT, CARET, AND, OR, SHL, SHR, BINOP,
+    BINOPEQ, AT, DOT, DOTDOT, DOTDOTDOT, COMMA, SEMI, COLON,
+    MOD_SEP, RARROW, FAT_ARROW, LPAREN, RPAREN, LBRACKET, RBRACKET,
+    LBRACE, RBRACE, POUND, DOLLAR, UNDERSCORE, LIT_CHAR,
+    LIT_INTEGER, LIT_FLOAT, LIT_STR, LIT_STR_RAW, LIT_BINARY,
+    LIT_BINARY_RAW, IDENT, LIFETIME, WHITESPACE, DOC_COMMENT,
+    COMMENT
+}
+
 /* Note: due to antlr limitations, we can't represent XID_start and
  * XID_continue properly. ASCII-only substitute. */
 
 fragment XID_start : [_a-zA-Z] ;
 fragment XID_continue : [_a-zA-Z0-9] ;
 
+
 /* Expression-operator symbols */
 
 EQ      : '=' ;
@@ -83,7 +95,7 @@ LIT_CHAR
   : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\''
   ;
 
-INT_SUFFIX
+fragment INT_SUFFIX
   : 'i'
   | 'i8'
   | 'i16'
@@ -141,25 +153,28 @@ LIFETIME : '\'' IDENT ;
 
 WHITESPACE : [ \r\n\t]+ ;
 
-COMMENT
-  : '//' ~[\r\n]*
-  | '////' ~[\r\n]*
-  | BLOCK_COMMENT
+LINE_COMMENT_NOT_A_TOKEN : '//' -> more, pushMode(LINE_COMMENT) ;
+
+
+DOC_BLOCK_COMMENT
+  : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
   ;
 
-mode DOCCOMMENT;
+BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
 
-fragment DOC_BLOCK_COMMENT
-  : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/'
+mode LINE_COMMENT;
+
+MAYBE_DOC_COMMENT
+  : '/' -> more, pushMode(LINE_DOC_COMMENT)
   ;
 
-DOC_COMMENT
-  : '///' ~[\r\n]*
-  | '//!' ~[\r\n]*
-  | DOC_BLOCK_COMMENT
+MAYBE_OUTER_DOC_COMMENT
+  : '!' ~[\r\n]* -> type(LINE_DOC_COMMENT), popMode
   ;
 
-fragment BLOCK_COMMENT
-  : '/*' (BLOCK_COMMENT | .)*? '*/'
-  ;
+COMMENT : ~[\r\n]* -> popMode ;
 
+mode LINE_DOC_COMMENT;
+
+ACTUALLY_A_COMMENT : '/' ~[\r\n]* -> type(COMMENT), popMode ;
+REALLY_A_DOC_COMMENT : ~[\r\n]* -> type(DOC_COMMENT), popMode ;

From 9fc5cf902f9613f40ce4d4346d1ae98a0904e67a Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 14 Jul 2014 17:27:28 -0700
Subject: [PATCH 04/12] Refine the tooling, handle comments

---
 src/grammar/RustLexer.g4 | 23 +++---------
 src/grammar/verify.rs    | 76 +++++++++++++++++++++++++++++++++-------
 2 files changed, 68 insertions(+), 31 deletions(-)

diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4
index f5d535a129f7..e4640ccfb164 100644
--- a/src/grammar/RustLexer.g4
+++ b/src/grammar/RustLexer.g4
@@ -153,28 +153,13 @@ LIFETIME : '\'' IDENT ;
 
 WHITESPACE : [ \r\n\t]+ ;
 
-LINE_COMMENT_NOT_A_TOKEN : '//' -> more, pushMode(LINE_COMMENT) ;
-
+UNDOC_COMMENT     : '////' ~[\r\n]* -> type(COMMENT) ;
+YESDOC_COMMENT    : '///' ~[\r\n]* -> type(DOC_COMMENT) ;
+OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
+LINE_COMMENT      : '//' ~[\r\n]* -> type(COMMENT) ;
 
 DOC_BLOCK_COMMENT
   : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
   ;
 
 BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
-
-mode LINE_COMMENT;
-
-MAYBE_DOC_COMMENT
-  : '/' -> more, pushMode(LINE_DOC_COMMENT)
-  ;
-
-MAYBE_OUTER_DOC_COMMENT
-  : '!' ~[\r\n]* -> type(LINE_DOC_COMMENT), popMode
-  ;
-
-COMMENT : ~[\r\n]* -> popMode ;
-
-mode LINE_DOC_COMMENT;
-
-ACTUALLY_A_COMMENT : '/' ~[\r\n]* -> type(COMMENT), popMode ;
-REALLY_A_DOC_COMMENT : ~[\r\n]* -> type(DOC_COMMENT), popMode ;
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index 56c78b89ba2e..38dd86e07729 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -108,13 +108,10 @@ fn parse_token_list(file: &str) -> HashMap<String, Token> {
     res
 }
 
-fn str_to_binop(mut s: &str) -> BinOp {
-    if s.ends_with("'") {
-        s = s.slice_to(s.len() - 1);
-    }
-
+fn str_to_binop(s: &str) -> BinOp {
     match s {
         "+" => PLUS,
+        "/" => SLASH,
         "-" => MINUS,
         "*" => STAR,
         "%" => PERCENT,
@@ -123,12 +120,35 @@ fn str_to_binop(mut s: &str) -> BinOp {
         "|" => OR,
         "<<" => SHL,
         ">>" => SHR,
-        _ => fail!("Bad binop str {}", s)
+        _ => fail!("Bad binop str `{}`", s)
     }
 }
 
+/// Assuming a raw string/binary literal, strip out the leading/trailing
+/// hashes and surrounding quotes/raw/binary prefix.
+fn fix(mut lit: &str) -> ast::Name {
+    if lit.char_at(0) == 'r' {
+        if lit.char_at(1) == 'b' {
+            lit = lit.slice_from(2)
+        } else {
+            lit = lit.slice_from(1);
+        }
+    } else if lit.char_at(0) == 'b' {
+        lit = lit.slice_from(1);
+    }
+
+    let leading_hashes = count(lit);
+
+    // +1/-1 to adjust for single quotes
+    parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))
+}
+
+fn count(lit: &str) -> uint {
+    lit.chars().take_while(|c| *c == '#').count()
+}
+
 fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
-    let re = regex!(r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?),<(?P<toknum>-?\d+)>,\d+:\d+]");
+    let re = regex!(r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]");
 
     let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
     let start = m.name("start");
@@ -137,9 +157,24 @@ fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
     let content = m.name("content");
 
     let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", toknum).as_slice());
+
+    let nm = parse::token::intern(content);
+
+    debug!("What we got: content (`{}`), proto: {}", content, proto_tok);
+
     let real_tok = match *proto_tok {
-        BINOP(PLUS) => BINOP(str_to_binop(content)),
-        BINOPEQ(PLUS) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 2))),
+        BINOP(..) => BINOP(str_to_binop(content)),
+        BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))),
+        LIT_STR(..) => LIT_STR(fix(content)),
+        LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)),
+        LIT_CHAR(..) => LIT_CHAR(nm),
+        DOC_COMMENT(..) => DOC_COMMENT(nm),
+        LIT_INTEGER(..) => LIT_INTEGER(nm),
+        LIT_FLOAT(..) => LIT_FLOAT(nm),
+        LIT_BINARY(..) => LIT_BINARY(nm),
+        LIT_BINARY_RAW(..) => LIT_BINARY_RAW(fix(content), count(content)),
+        IDENT(..) => IDENT(ast::Ident { name: nm, ctxt: 0 }, true),
+        LIFETIME(..) => LIFETIME(ast::Ident { name: nm, ctxt: 0 }),
         ref t => t.clone()
     };
 
@@ -161,6 +196,16 @@ fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
     }
 }
 
+fn tok_cmp(a: &Token, b: &Token) -> bool {
+    match a {
+        &IDENT(id, _) => match b {
+                &IDENT(id2, _) => id == id2,
+                _ => false
+        },
+        _ => a == b
+    }
+}
+
 fn main() {
     fn next(r: &mut lexer::StringReader) -> TokenAndSpan {
         use syntax::parse::lexer::Reader;
@@ -173,7 +218,8 @@ fn main() {
 
     let code = File::open(&Path::new(std::os::args().get(1).as_slice())).unwrap().read_to_string().unwrap();
     let options = config::basic_options();
-    let session = session::build_session(options, None);
+    let session = session::build_session(options, None,
+                                         syntax::diagnostics::registry::Registry::new([]));
     let filemap = parse::string_to_filemap(&session.parse_sess,
                                            code,
                                            String::from_str("<n/a>"));
@@ -191,10 +237,16 @@ fn main() {
             ( $($x:pat),+ ) => (
                 match rustc_tok.tok {
                     $($x => match antlr_tok.tok {
-                        $x => (),
+                        $x => {
+                            if !tok_cmp(&rustc_tok.tok, &antlr_tok.tok) {
+                                // FIXME #15677: needs more robust escaping in
+                                // antlr
+                                warn!("Different names for {} and {}", rustc_tok, antlr_tok);
+                            }
+                        }
                         _ => fail!("{} is not {}", antlr_tok, rustc_tok)
                     },)*
-                    ref c => assert!(c == antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok)
+                    ref c => assert!(c == &antlr_tok.tok, "{} is not {}", rustc_tok, antlr_tok)
                 }
             )
         )

From f8fd32ef9dd48a216ae5ca44ca65ea8f2205f581 Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 14 Jul 2014 20:45:39 -0700
Subject: [PATCH 05/12] Byte/raw binary literal fixes

---
 src/grammar/.gitignore   |  4 ++++
 src/grammar/RustLexer.g4 |  9 +++++++--
 src/grammar/verify.rs    | 15 +++++++++++++--
 3 files changed, 24 insertions(+), 4 deletions(-)
 create mode 100644 src/grammar/.gitignore

diff --git a/src/grammar/.gitignore b/src/grammar/.gitignore
new file mode 100644
index 000000000000..e77db28967e3
--- /dev/null
+++ b/src/grammar/.gitignore
@@ -0,0 +1,4 @@
+verify
+*.class
+*.java
+*.tokens
diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4
index e4640ccfb164..f2705e5421b9 100644
--- a/src/grammar/RustLexer.g4
+++ b/src/grammar/RustLexer.g4
@@ -44,6 +44,7 @@ SHR     : '>>' ;
 
 BINOP
     : PLUS
+    | SLASH
     | MINUS
     | STAR
     | PERCENT
@@ -95,6 +96,10 @@ LIT_CHAR
   : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\''
   ;
 
+LIT_BYTE
+  : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\''
+  ;
+
 fragment INT_SUFFIX
   : 'i'
   | 'i8'
@@ -130,7 +135,7 @@ LIT_STR
   ;
 
 LIT_BINARY : 'b' LIT_STR ;
-LIT_BINARY_RAW : 'b' LIT_STR_RAW ;
+LIT_BINARY_RAW : 'rb' LIT_STR_RAW ;
 
 /* this is a bit messy */
 
@@ -159,7 +164,7 @@ OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
 LINE_COMMENT      : '//' ~[\r\n]* -> type(COMMENT) ;
 
 DOC_BLOCK_COMMENT
-  : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
+  : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
   ;
 
 BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index 38dd86e07729..c20aebe65d50 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -71,6 +71,7 @@ fn parse_token_list(file: &str) -> HashMap<String, Token> {
             "IDENT" => id(),
             "PLUS" => BINOP(PLUS),
             "LIT_CHAR" => LIT_CHAR(Name(0)),
+            "LIT_BYTE" => LIT_BYTE(Name(0)),
             "EQ" => EQ,
             "RBRACKET" => RBRACKET,
             "COMMENT" => COMMENT,
@@ -124,7 +125,7 @@ fn str_to_binop(s: &str) -> BinOp {
     }
 }
 
-/// Assuming a raw string/binary literal, strip out the leading/trailing
+/// Assuming a string/binary literal, strip out the leading/trailing
 /// hashes and surrounding quotes/raw/binary prefix.
 fn fix(mut lit: &str) -> ast::Name {
     if lit.char_at(0) == 'r' {
@@ -143,6 +144,15 @@ fn fix(mut lit: &str) -> ast::Name {
     parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))
 }
 
+/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
+fn fixchar(mut lit: &str) -> ast::Name {
+    if lit.char_at(0) == 'b' {
+        lit = lit.slice_from(1);
+    }
+
+    parse::token::intern(lit.slice(1, lit.len() - 1))
+}
+
 fn count(lit: &str) -> uint {
     lit.chars().take_while(|c| *c == '#').count()
 }
@@ -167,7 +177,8 @@ fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
         BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))),
         LIT_STR(..) => LIT_STR(fix(content)),
         LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)),
-        LIT_CHAR(..) => LIT_CHAR(nm),
+        LIT_CHAR(..) => LIT_CHAR(fixchar(content)),
+        LIT_BYTE(..) => LIT_BYTE(fixchar(content)),
         DOC_COMMENT(..) => DOC_COMMENT(nm),
         LIT_INTEGER(..) => LIT_INTEGER(nm),
         LIT_FLOAT(..) => LIT_FLOAT(nm),

From 188d889aaf8cccb73243812881ce1030355386cc Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 14 Jul 2014 20:46:04 -0700
Subject: [PATCH 06/12] ignore-lexer-test to broken files and remove some tray
 hyphens

I blame @ChrisMorgan for the hyphens.
---
 src/libcollections/slice.rs  |  2 +-
 src/libcollections/str.rs    |  2 ++
 src/libcollections/string.rs |  2 ++
 src/libcore/str.rs           |  2 ++
 src/libcoretest/char.rs      |  2 ++
 src/libgetopts/lib.rs        |  2 ++
 src/libgreen/macros.rs       |  1 +
 src/libregex/lib.rs          |  2 ++
 src/librustrt/util.rs        |  2 ++
 src/libserialize/base64.rs   |  2 ++
 src/libserialize/hex.rs      |  2 ++
 src/libstd/ascii.rs          |  2 ++
 src/libstd/fmt.rs            |  2 ++
 src/libsyntax/codemap.rs     |  2 ++
 src/libtime/lib.rs           | 16 ++++++++--------
 src/libunicode/u_str.rs      |  2 ++
 16 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/src/libcollections/slice.rs b/src/libcollections/slice.rs
index 40cf8495a405..69ca2a4107ce 100644
--- a/src/libcollections/slice.rs
+++ b/src/libcollections/slice.rs
@@ -155,7 +155,7 @@ impl<'a, T: Clone, V: Vector<T>> VectorVector<T> for &'a [V] {
 /// a sequence of all possible permutations for an indexed sequence of
 /// elements. Each permutation is only a single swap apart.
 ///
-/// The Steinhaus–Johnson–Trotter algorithm is used.
+/// The Steinhaus-Johnson-Trotter algorithm is used.
 ///
 /// Generates even and odd permutations alternately.
 ///
diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index 19db88453809..b11f98f52d9f 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 /*!
 
diff --git a/src/libcollections/string.rs b/src/libcollections/string.rs
index 74b9465f2a56..7e94cfc1b96f 100644
--- a/src/libcollections/string.rs
+++ b/src/libcollections/string.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! An owned, growable string that enforces that its contents are valid UTF-8.
 
diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index aa2050dacf1a..c5ef1eca44da 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! String manipulation
 //!
diff --git a/src/libcoretest/char.rs b/src/libcoretest/char.rs
index 51d13535caf3..ebc6e9862288 100644
--- a/src/libcoretest/char.rs
+++ b/src/libcoretest/char.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 use core::char::{escape_unicode, escape_default};
 
diff --git a/src/libgetopts/lib.rs b/src/libgetopts/lib.rs
index eaec31a45f42..8069b7002708 100644
--- a/src/libgetopts/lib.rs
+++ b/src/libgetopts/lib.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15677
 
 //! Simple getopt alternative.
 //!
diff --git a/src/libgreen/macros.rs b/src/libgreen/macros.rs
index eddf17b34b9f..4cce430d88a8 100644
--- a/src/libgreen/macros.rs
+++ b/src/libgreen/macros.rs
@@ -9,6 +9,7 @@
 // except according to those terms.
 
 // FIXME: this file probably shouldn't exist
+// ignore-lexer-test FIXME #15677
 
 #![macro_escape]
 
diff --git a/src/libregex/lib.rs b/src/libregex/lib.rs
index 1bb7f605e547..fae3e5986806 100644
--- a/src/libregex/lib.rs
+++ b/src/libregex/lib.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! This crate provides a native implementation of regular expressions that is
 //! heavily based on RE2 both in syntax and in implementation. Notably,
diff --git a/src/librustrt/util.rs b/src/librustrt/util.rs
index 40c3e19576e4..1334000ed1f5 100644
--- a/src/librustrt/util.rs
+++ b/src/librustrt/util.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15677
 
 use core::prelude::*;
 
diff --git a/src/libserialize/base64.rs b/src/libserialize/base64.rs
index 63cfbd6d9aa1..bd81091bd55e 100644
--- a/src/libserialize/base64.rs
+++ b/src/libserialize/base64.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! Base64 binary-to-text encoding
 use std::str;
diff --git a/src/libserialize/hex.rs b/src/libserialize/hex.rs
index 51fab7b13545..568c4dafd155 100644
--- a/src/libserialize/hex.rs
+++ b/src/libserialize/hex.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! Hex binary-to-text encoding
 use std::str;
diff --git a/src/libstd/ascii.rs b/src/libstd/ascii.rs
index 796147ce7a05..eccee007d5cd 100644
--- a/src/libstd/ascii.rs
+++ b/src/libstd/ascii.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! Operations on ASCII strings and characters
 
diff --git a/src/libstd/fmt.rs b/src/libstd/fmt.rs
index 5834e576b081..13c52545274b 100644
--- a/src/libstd/fmt.rs
+++ b/src/libstd/fmt.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 /*!
 
diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs
index ef4024a8f83f..2f30108c27bd 100644
--- a/src/libsyntax/codemap.rs
+++ b/src/libsyntax/codemap.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 /*!
 
diff --git a/src/libtime/lib.rs b/src/libtime/lib.rs
index 7655ace0ecb0..f8f63d502739 100644
--- a/src/libtime/lib.rs
+++ b/src/libtime/lib.rs
@@ -206,28 +206,28 @@ pub fn tzset() {
 /// also called a broken-down time value.
 #[deriving(Clone, PartialEq, Show)]
 pub struct Tm {
-    /// Seconds after the minute – [0, 60]
+    /// Seconds after the minute - [0, 60]
     pub tm_sec: i32,
 
-    /// Minutes after the hour – [0, 59]
+    /// Minutes after the hour - [0, 59]
     pub tm_min: i32,
 
-    /// Hours after midnight – [0, 23]
+    /// Hours after midnight - [0, 23]
     pub tm_hour: i32,
 
-    /// Day of the month – [1, 31]
+    /// Day of the month - [1, 31]
     pub tm_mday: i32,
 
-    /// Months since January – [0, 11]
+    /// Months since January - [0, 11]
     pub tm_mon: i32,
 
     /// Years since 1900
     pub tm_year: i32,
 
-    /// Days since Sunday – [0, 6]. 0 = Sunday, 1 = Monday, …, 6 = Saturday.
+    /// Days since Sunday - [0, 6]. 0 = Sunday, 1 = Monday, ..., 6 = Saturday.
     pub tm_wday: i32,
 
-    /// Days since January 1 – [0, 365]
+    /// Days since January 1 - [0, 365]
     pub tm_yday: i32,
 
     /// Daylight Saving Time flag.
@@ -241,7 +241,7 @@ pub struct Tm {
     /// for U.S. Pacific Daylight Time, the value is -7*60*60 = -25200.
     pub tm_gmtoff: i32,
 
-    /// Nanoseconds after the second – [0, 10<sup>9</sup> - 1]
+    /// Nanoseconds after the second - [0, 10<sup>9</sup> - 1]
     pub tm_nsec: i32,
 }
 
diff --git a/src/libunicode/u_str.rs b/src/libunicode/u_str.rs
index 84a2eab4b251..85f311d47eb2 100644
--- a/src/libunicode/u_str.rs
+++ b/src/libunicode/u_str.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 /*!
  * Unicode-intensive string manipulations.

From cbd6799110b858505c271b8169e356c62e34b80a Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Tue, 15 Jul 2014 00:18:17 -0700
Subject: [PATCH 07/12] lexer tests: makefile/configure

---
 Makefile.in           | 13 +++++++++++++
 configure             |  3 +++
 mk/grammar.mk         | 43 +++++++++++++++++++++++++++++++++++++++++++
 src/grammar/check.sh  | 21 +++++++++++++++++++++
 src/grammar/verify.rs |  6 ++++--
 5 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 mk/grammar.mk
 create mode 100755 src/grammar/check.sh

diff --git a/Makefile.in b/Makefile.in
index a8a63a42066b..2612761cef95 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -252,6 +252,19 @@ ifneq ($(findstring clean,$(MAKECMDGOALS)),)
   include $(CFG_SRC_DIR)mk/clean.mk
 endif
 
+# Grammar tests
+
+ifneq ($(findstring lexer,$(MAKECMDGOALS)),)
+  ifdef CFG_JAVAC
+	ifdef CFG_ANTLR4
+	  ifdef CFG_GRUN
+	    CFG_INFO := $(info cfg: including grammar tests)
+	    include $(CFG_SRC_DIR)mk/grammar.mk
+	  endif
+	endif
+  endif
+endif
+
 # CTAGS building
 ifneq ($(strip $(findstring TAGS.emacs,$(MAKECMDGOALS)) \
                $(findstring TAGS.vi,$(MAKECMDGOALS))),)
diff --git a/configure b/configure
index 135bdcd3782f..b6513cb0f74c 100755
--- a/configure
+++ b/configure
@@ -493,6 +493,9 @@ probe CFG_VALGRIND         valgrind
 probe CFG_PERF             perf
 probe CFG_ISCC             iscc
 probe CFG_LLNEXTGEN        LLnextgen
+probe CFG_JAVAC            javac
+probe CFG_ANTLR4           antlr4
+probe CFG_GRUN             grun
 probe CFG_PANDOC           pandoc
 probe CFG_PDFLATEX         pdflatex
 probe CFG_XELATEX          xelatex
diff --git a/mk/grammar.mk b/mk/grammar.mk
new file mode 100644
index 000000000000..03e253c7278c
--- /dev/null
+++ b/mk/grammar.mk
@@ -0,0 +1,43 @@
+# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+BG = $(CFG_BUILD_DIR)/grammar/
+SG = $(S)src/grammar/
+B = $(CFG_BUILD_DIR)/$(CFG_BUILD)/stage2/
+L = $(B)lib/rustlib/$(CFG_BUILD)/lib
+LD = $(CFG_BUILD)/stage2/lib/rustlib/$(CFG_BUILD)/lib/
+RUSTC = $(B)bin/rustc
+
+# Run the reference lexer against libsyntax and compare the tokens and spans.
+# If "// ignore-lexer-test" is present in the file, it will be ignored.
+#
+# $(1) is the file to test.
+define LEXER_TEST
+grep "// ignore-lexer-test" $(1) ; \
+  if [ $$? -eq 1 ]; then \
+   CLASSPATH=$(B)grammar $(CFG_GRUN) RustLexer tokens -tokens < $(1) \
+   | $(B)grammar/verify $(1) ; \
+  fi
+endef
+
+$(BG):
+	$(Q)mkdir -p $(BG)
+
+$(BG)RustLexer.class: $(SG)RustLexer.g4
+	$(Q)$(CFG_ANTLR4) -o $(B)grammar $(SG)RustLexer.g4
+	$(Q)$(CFG_JAVAC) -d $(BG) $(BG)RustLexer.java
+
+$(BG)verify: $(SG)verify.rs rustc-stage2-H-$(CFG_BUILD) $(LD)stamp.regex_macros $(LD)stamp.rustc
+	$(Q)$(RUSTC) -O --out-dir $(BG) -L $(L) $(SG)verify.rs
+
+check-lexer: $(BG) $(BG)RustLexer.class $(BG)verify
+	$(info Verifying libsyntax against the reference lexer ...)
+	$(Q)find $(S) -iname '*.rs' -exec "$(SG)check.sh" {} "$(BG)" \
+      "$(CFG_GRUN)" "$(BG)verify" "$(BG)RustLexer.tokens" "$(VERBOSE)" \;
diff --git a/src/grammar/check.sh b/src/grammar/check.sh
new file mode 100755
index 000000000000..3ddbb8a34c81
--- /dev/null
+++ b/src/grammar/check.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+# Run the reference lexer against libsyntax and compare the tokens and spans.
+# If "// ignore-lexer-test" is present in the file, it will be ignored.
+#
+# Argument $1 is the file to check, $2 is the classpath to use, $3 is the path
+# to the grun binary, $4 is the path to the verify binary, $5 is the path to
+# RustLexer.tokens
+
+if [ "${VERBOSE}" == "1" ]; then
+    set -x
+fi
+
+grep -q "// ignore lexer-test" $1;
+
+if [ $? -eq 1 ]; then
+    cd $2 # This `cd` is so java will pick up RustLexer.class. I couldn't
+    # figure out how to wrangle the CLASSPATH, just adding build/grammr didn't
+    # seem to have anny effect.
+    $3 RustLexer tokens -tokens < $1 | $4 $1 $5
+fi
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index c20aebe65d50..7fddf9b887de 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -223,11 +223,13 @@ fn main() {
         r.next_token()
     }
 
-    let token_map = parse_token_list(File::open(&Path::new("RustLexer.tokens")).unwrap().read_to_string().unwrap().as_slice());
+    let args = std::os::args();
+
+    let token_map = parse_token_list(File::open(&Path::new(args.get(2).as_slice())).unwrap().read_to_string().unwrap().as_slice());
     let mut stdin = std::io::stdin();
     let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map));
 
-    let code = File::open(&Path::new(std::os::args().get(1).as_slice())).unwrap().read_to_string().unwrap();
+    let code = File::open(&Path::new(args.get(1).as_slice())).unwrap().read_to_string().unwrap();
     let options = config::basic_options();
     let session = session::build_session(options, None,
                                          syntax::diagnostics::registry::Registry::new([]));

From dd3afb42d1cc5eb11f2e024167aca0a6d6173b98 Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 21 Jul 2014 12:59:25 -0700
Subject: [PATCH 08/12] Break apart long lines in verify.rs

---
 src/grammar/verify.rs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index 7fddf9b887de..a6a1a75854d5 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -225,7 +225,9 @@ fn main() {
 
     let args = std::os::args();
 
-    let token_map = parse_token_list(File::open(&Path::new(args.get(2).as_slice())).unwrap().read_to_string().unwrap().as_slice());
+    let mut token_file = File::open(&Path::new(args.get(2).as_slice()));
+    let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice());
+
     let mut stdin = std::io::stdin();
     let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map));
 

From c41a7dfcc7b1c71305bd1816bb2e6aff7abddbb2 Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 21 Jul 2014 13:04:35 -0700
Subject: [PATCH 09/12] Shuffle around check-lexer conditions

---
 Makefile.in           | 14 +-------------
 mk/grammar.mk         | 16 ++++++++++++++--
 mk/tests.mk           |  4 +++-
 src/grammar/check.sh  | 31 ++++++++++++++++++++++---------
 src/grammar/verify.rs | 23 +++++++++++++++++++----
 5 files changed, 59 insertions(+), 29 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 2612761cef95..5683eb7ba06a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -216,6 +216,7 @@ ifneq ($(strip $(findstring check,$(MAKECMDGOALS)) \
                $(findstring tidy,$(MAKECMDGOALS))),)
   CFG_INFO := $(info cfg: including test rules)
   include $(CFG_SRC_DIR)mk/tests.mk
+  include $(CFG_SRC_DIR)mk/grammar.mk
 endif
 
 # Performance and benchmarking
@@ -252,19 +253,6 @@ ifneq ($(findstring clean,$(MAKECMDGOALS)),)
   include $(CFG_SRC_DIR)mk/clean.mk
 endif
 
-# Grammar tests
-
-ifneq ($(findstring lexer,$(MAKECMDGOALS)),)
-  ifdef CFG_JAVAC
-	ifdef CFG_ANTLR4
-	  ifdef CFG_GRUN
-	    CFG_INFO := $(info cfg: including grammar tests)
-	    include $(CFG_SRC_DIR)mk/grammar.mk
-	  endif
-	endif
-  endif
-endif
-
 # CTAGS building
 ifneq ($(strip $(findstring TAGS.emacs,$(MAKECMDGOALS)) \
                $(findstring TAGS.vi,$(MAKECMDGOALS))),)
diff --git a/mk/grammar.mk b/mk/grammar.mk
index 03e253c7278c..c0afa3eb7694 100644
--- a/mk/grammar.mk
+++ b/mk/grammar.mk
@@ -38,6 +38,18 @@ $(BG)verify: $(SG)verify.rs rustc-stage2-H-$(CFG_BUILD) $(LD)stamp.regex_macros
 	$(Q)$(RUSTC) -O --out-dir $(BG) -L $(L) $(SG)verify.rs
 
 check-lexer: $(BG) $(BG)RustLexer.class $(BG)verify
+ifdef CFG_JAVAC
+ifdef CFG_ANTLR4
+ifdef CFG_GRUN
 	$(info Verifying libsyntax against the reference lexer ...)
-	$(Q)find $(S) -iname '*.rs' -exec "$(SG)check.sh" {} "$(BG)" \
-      "$(CFG_GRUN)" "$(BG)verify" "$(BG)RustLexer.tokens" "$(VERBOSE)" \;
+	$(Q)$(SG)check.sh $(S) "$(BG)" \
+		"$(CFG_GRUN)" "$(BG)verify" "$(BG)RustLexer.tokens"
+else
+$(info grun not available, skipping lexer test...)
+endif
+else
+$(info antlr4 not available, skipping lexer test...)
+endif
+else
+$(info javac not available, skipping lexer test...)
+endif
diff --git a/mk/tests.mk b/mk/tests.mk
index d2e4388521ec..6068af8f7f46 100644
--- a/mk/tests.mk
+++ b/mk/tests.mk
@@ -171,7 +171,7 @@ endif
 # Main test targets
 ######################################################################
 
-check: cleantmptestlogs cleantestlibs check-notidy tidy
+check: cleantmptestlogs cleantestlibs check-notidy tidy check-syntax
 
 check-notidy: cleantmptestlogs cleantestlibs all check-stage2
 	$(Q)$(CFG_PYTHON) $(S)src/etc/check-summary.py tmp/*.log
@@ -192,6 +192,8 @@ check-docs: cleantestlibs cleantmptestlogs check-stage2-docs
 # NOTE: Remove after reprogramming windows bots
 check-fast: check-lite
 
+check-syntax: check-lexer
+
 .PHONY: cleantmptestlogs cleantestlibs
 
 cleantmptestlogs:
diff --git a/src/grammar/check.sh b/src/grammar/check.sh
index 3ddbb8a34c81..69ec490a08a3 100755
--- a/src/grammar/check.sh
+++ b/src/grammar/check.sh
@@ -2,20 +2,33 @@
 
 # Run the reference lexer against libsyntax and compare the tokens and spans.
 # If "// ignore-lexer-test" is present in the file, it will be ignored.
-#
+
+
 # Argument $1 is the file to check, $2 is the classpath to use, $3 is the path
 # to the grun binary, $4 is the path to the verify binary, $5 is the path to
 # RustLexer.tokens
-
 if [ "${VERBOSE}" == "1" ]; then
     set -x
 fi
 
-grep -q "// ignore lexer-test" $1;
+check() {
+    grep --silent "// ignore-lexer-test" $1;
 
-if [ $? -eq 1 ]; then
-    cd $2 # This `cd` is so java will pick up RustLexer.class. I couldn't
-    # figure out how to wrangle the CLASSPATH, just adding build/grammr didn't
-    # seem to have anny effect.
-    $3 RustLexer tokens -tokens < $1 | $4 $1 $5
-fi
+    # if it's *not* found...
+    if [ $? -eq 1 ]; then
+        cd $2 # This `cd` is so java will pick up RustLexer.class. I couldn't
+        # figure out how to wrangle the CLASSPATH, just adding build/grammr didn't
+        # seem to have anny effect.
+        if $3 RustLexer tokens -tokens < $1 | $4 $1 $5; then
+            echo "pass: $1"
+        else
+            echo "fail: $1"
+        fi
+    else
+        echo "skip: $1"
+    fi
+}
+
+for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail/*' ); do
+    check $file $2 $3 $4 $5
+done
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index a6a1a75854d5..f2ae5a1ea4e5 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -1,3 +1,13 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
 #![feature(globs, phase, macro_rules)]
 
 extern crate syntax;
@@ -158,7 +168,9 @@ fn count(lit: &str) -> uint {
 }
 
 fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
-    let re = regex!(r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]");
+    let re = regex!(
+      r"\[@(?P<seq>\d+),(?P<start>\d+):(?P<end>\d+)='(?P<content>.+?)',<(?P<toknum>-?\d+)>,\d+:\d+]"
+    );
 
     let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice());
     let start = m.name("start");
@@ -166,7 +178,8 @@ fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
     let toknum = m.name("toknum");
     let content = m.name("content");
 
-    let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map", toknum).as_slice());
+    let proto_tok = tokens.find_equiv(&toknum).expect(format!("didn't find token {} in the map",
+                                                              toknum).as_slice());
 
     let nm = parse::token::intern(content);
 
@@ -229,7 +242,8 @@ fn main() {
     let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice());
 
     let mut stdin = std::io::stdin();
-    let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map));
+    let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(),
+                                                                   &token_map));
 
     let code = File::open(&Path::new(args.get(1).as_slice())).unwrap().read_to_string().unwrap();
     let options = config::basic_options();
@@ -246,7 +260,8 @@ fn main() {
             continue
         }
 
-        assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok, antlr_tok);
+        assert!(rustc_tok.sp == antlr_tok.sp, "{} and {} have different spans", rustc_tok,
+                antlr_tok);
 
         macro_rules! matches (
             ( $($x:pat),+ ) => (

From 35c0bf32926ec8095a7ca5b5b77e612c15ddf098 Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 21 Jul 2014 15:57:14 -0700
Subject: [PATCH 10/12] Add a ton of ignore-lexer-test

---
 src/grammar/check.sh                          |   2 +-
 src/libcollections/hash/sip.rs                |   2 ++
 src/libcore/num/mod.rs                        |   2 ++
 src/libnative/io/tty_win32.rs                 |   2 ++
 src/librand/distributions/gamma.rs            |   2 ++
 src/libregex/test/tests.rs                    |   1 +
 src/libstd/collections/hashmap.rs             |   2 ++
 src/libstd/io/buffered.rs                     |   2 ++
 src/libstd/io/fs.rs                           |   2 ++
 src/libstd/io/mem.rs                          |   2 ++
 src/libstd/io/mod.rs                          |   2 ++
 src/libstd/num/strconv.rs                     |   2 ++
 src/libstd/path/windows.rs                    |   2 ++
 src/libsyntax/ext/tt/macro_parser.rs          |   2 ++
 src/test/bench/core-std.rs                    |   1 +
 src/test/bench/msgsend-ring-mutex-arcs.rs     |   1 +
 src/test/bench/msgsend-ring-rw-arcs.rs        |   1 +
 src/test/bench/noise.rs                       |   1 +
 src/test/pretty/block-comment-wchar.rs        |   1 +
 src/test/run-pass/byte-literals.rs            |   2 ++
 .../default-method-supertrait-vtable.rs       |   2 ++
 src/test/run-pass/ifmt.rs                     |   1 +
 src/test/run-pass/issue-12582.rs              |   2 ++
 src/test/run-pass/issue-13027.rs              |   2 ++
 src/test/run-pass/issue-2185.rs               |   1 +
 src/test/run-pass/issue-2718.rs               |   2 ++
 src/test/run-pass/issue-3683.rs               |   2 ++
 src/test/run-pass/issue-4759-1.rs             |   2 ++
 src/test/run-pass/issue-5280.rs               |   2 ++
 .../issue-5321-immediates-with-bare-self.rs   |   2 ++
 ...line-endings-string-literal-doc-comment.rs |   1 +
 ...ase-types-non-uppercase-statics-unicode.rs |   2 ++
 src/test/run-pass/match-range.rs              |   2 ++
 src/test/run-pass/multibyte.rs                |   2 ++
 src/test/run-pass/raw-str.rs                  | Bin 1305 -> 1339 bytes
 src/test/run-pass/shebang.rs                  |   2 ++
 src/test/run-pass/struct-return.rs            |   2 ++
 src/test/run-pass/trait-to-str.rs             |   3 ++-
 .../run-pass/trait-with-bounds-default.rs     |   2 ++
 .../run-pass/traits-default-method-self.rs    |   2 ++
 .../run-pass/traits-default-method-trivial.rs |   2 ++
 src/test/run-pass/unsized.rs                  |   2 ++
 src/test/run-pass/unsized2.rs                 |   2 ++
 src/test/run-pass/utf8-bom.rs                 |   2 ++
 src/test/run-pass/utf8.rs                     |   3 ++-
 src/test/run-pass/utf8_chars.rs               |   2 ++
 src/test/run-pass/utf8_idents.rs              |   2 ++
 47 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/src/grammar/check.sh b/src/grammar/check.sh
index 69ec490a08a3..f2836312437c 100755
--- a/src/grammar/check.sh
+++ b/src/grammar/check.sh
@@ -29,6 +29,6 @@ check() {
     fi
 }
 
-for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail/*' ); do
+for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail*'); do
     check $file $2 $3 $4 $5
 done
diff --git a/src/libcollections/hash/sip.rs b/src/libcollections/hash/sip.rs
index 1c7e03f70c88..7168af89b59e 100644
--- a/src/libcollections/hash/sip.rs
+++ b/src/libcollections/hash/sip.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15883
 
 /*!
  * Implementation of SipHash 2-4
diff --git a/src/libcore/num/mod.rs b/src/libcore/num/mod.rs
index 3230873883e1..3ffc1d5e11c7 100644
--- a/src/libcore/num/mod.rs
+++ b/src/libcore/num/mod.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! Numeric traits and functions for generic mathematics
 
diff --git a/src/libnative/io/tty_win32.rs b/src/libnative/io/tty_win32.rs
index 72cf5e785fb9..e98fe1e20b19 100644
--- a/src/libnative/io/tty_win32.rs
+++ b/src/libnative/io/tty_win32.rs
@@ -8,6 +8,8 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+// ignore-lexer-test FIXME #15877
+
 //! Windows specific console TTY implementation
 //!
 //! This module contains the implementation of a Windows specific console TTY.
diff --git a/src/librand/distributions/gamma.rs b/src/librand/distributions/gamma.rs
index a9f24e1a9ecc..7b6e94eaa920 100644
--- a/src/librand/distributions/gamma.rs
+++ b/src/librand/distributions/gamma.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! The Gamma and derived distributions.
 
diff --git a/src/libregex/test/tests.rs b/src/libregex/test/tests.rs
index 251ab10ad34e..48065992bb05 100644
--- a/src/libregex/test/tests.rs
+++ b/src/libregex/test/tests.rs
@@ -9,6 +9,7 @@
 // except according to those terms.
 
 // ignore-tidy-linelength
+// ignore-lexer-test FIXME #15679
 
 use regex::{Regex, NoExpand};
 
diff --git a/src/libstd/collections/hashmap.rs b/src/libstd/collections/hashmap.rs
index 098e87243b69..f9c99e54df01 100644
--- a/src/libstd/collections/hashmap.rs
+++ b/src/libstd/collections/hashmap.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15883
 
 //! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types)
 
diff --git a/src/libstd/io/buffered.rs b/src/libstd/io/buffered.rs
index 4f355502eb88..e25006a7b395 100644
--- a/src/libstd/io/buffered.rs
+++ b/src/libstd/io/buffered.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15883
 
 //! Buffering wrappers for I/O traits
 
diff --git a/src/libstd/io/fs.rs b/src/libstd/io/fs.rs
index 449ad6fa0dab..e25ec5366c9f 100644
--- a/src/libstd/io/fs.rs
+++ b/src/libstd/io/fs.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 /*! Synchronous File I/O
 
diff --git a/src/libstd/io/mem.rs b/src/libstd/io/mem.rs
index 1c0251c8369d..b93b84b7d63f 100644
--- a/src/libstd/io/mem.rs
+++ b/src/libstd/io/mem.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! Readers and Writers for in-memory buffers
 
diff --git a/src/libstd/io/mod.rs b/src/libstd/io/mod.rs
index 6ac092fd8c65..42ce2180fa67 100644
--- a/src/libstd/io/mod.rs
+++ b/src/libstd/io/mod.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15883
 
 // FIXME: cover these topics:
 //        path, reader, writer, stream, raii (close not needed),
diff --git a/src/libstd/num/strconv.rs b/src/libstd/num/strconv.rs
index 88fc6e1ffd85..cc30acf064b8 100644
--- a/src/libstd/num/strconv.rs
+++ b/src/libstd/num/strconv.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 #![allow(missing_doc)]
 
diff --git a/src/libstd/path/windows.rs b/src/libstd/path/windows.rs
index 88ae0d4837e5..0de098319e8b 100644
--- a/src/libstd/path/windows.rs
+++ b/src/libstd/path/windows.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15883
 
 //! Windows file path handling
 
diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs
index bdf1f6eb6007..509d5bd44218 100644
--- a/src/libsyntax/ext/tt/macro_parser.rs
+++ b/src/libsyntax/ext/tt/macro_parser.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 //! This is an Earley-like parser, without support for in-grammar nonterminals,
 //! only by calling out to the main rust parser for named nonterminals (which it
diff --git a/src/test/bench/core-std.rs b/src/test/bench/core-std.rs
index fd3c4daebdb8..9af3c0c6c8c1 100644
--- a/src/test/bench/core-std.rs
+++ b/src/test/bench/core-std.rs
@@ -8,6 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
+// ignore-lexer-test FIXME #15679
 // Microbenchmarks for various functions in std and extra
 
 #![feature(macro_rules)]
diff --git a/src/test/bench/msgsend-ring-mutex-arcs.rs b/src/test/bench/msgsend-ring-mutex-arcs.rs
index 2b9abfbc350a..a0ff7736b5c7 100644
--- a/src/test/bench/msgsend-ring-mutex-arcs.rs
+++ b/src/test/bench/msgsend-ring-mutex-arcs.rs
@@ -16,6 +16,7 @@
 // This also serves as a pipes test, because Arcs are implemented with pipes.
 
 // no-pretty-expanded FIXME #15189
+// ignore-lexer-test FIXME #15679
 
 extern crate time;
 
diff --git a/src/test/bench/msgsend-ring-rw-arcs.rs b/src/test/bench/msgsend-ring-rw-arcs.rs
index afed753f455b..6512ecfb3e26 100644
--- a/src/test/bench/msgsend-ring-rw-arcs.rs
+++ b/src/test/bench/msgsend-ring-rw-arcs.rs
@@ -16,6 +16,7 @@
 // This also serves as a pipes test, because Arcs are implemented with pipes.
 
 // no-pretty-expanded FIXME #15189
+// ignore-lexer-test FIXME #15679
 
 extern crate time;
 
diff --git a/src/test/bench/noise.rs b/src/test/bench/noise.rs
index 6ec1d5395cf0..bdca03490369 100644
--- a/src/test/bench/noise.rs
+++ b/src/test/bench/noise.rs
@@ -10,6 +10,7 @@
 
 // Multi-language Perlin noise benchmark.
 // See https://github.com/nsf/pnoise for timings and alternative implementations.
+// ignore-lexer-test FIXME #15679
 
 use std::f32::consts::PI;
 use std::rand::{Rng, StdRng};
diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs
index 777c456335dc..06ee3715eb04 100644
--- a/src/test/pretty/block-comment-wchar.rs
+++ b/src/test/pretty/block-comment-wchar.rs
@@ -14,6 +14,7 @@
 // ignore-tidy-cr
 // ignore-tidy-tab
 // pp-exact:block-comment-wchar.pp
+// ignore-lexer-test FIXME #15679
 fn f() {
     fn nested() {
         /*
diff --git a/src/test/run-pass/byte-literals.rs b/src/test/run-pass/byte-literals.rs
index ac470268d319..7fd7e3dbf004 100644
--- a/src/test/run-pass/byte-literals.rs
+++ b/src/test/run-pass/byte-literals.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15883
 
 
 static FOO: u8 = b'\xF0';
diff --git a/src/test/run-pass/default-method-supertrait-vtable.rs b/src/test/run-pass/default-method-supertrait-vtable.rs
index 2bcf264bb1f1..1b2b17f99171 100644
--- a/src/test/run-pass/default-method-supertrait-vtable.rs
+++ b/src/test/run-pass/default-method-supertrait-vtable.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 
 // Tests that we can call a function bounded over a supertrait from
diff --git a/src/test/run-pass/ifmt.rs b/src/test/run-pass/ifmt.rs
index e349f91a309f..fabcfc5ff334 100644
--- a/src/test/run-pass/ifmt.rs
+++ b/src/test/run-pass/ifmt.rs
@@ -9,6 +9,7 @@
 // except according to those terms.
 
 // no-pretty-expanded unnecessary unsafe block generated
+// ignore-lexer-test FIXME #15679
 
 #![feature(macro_rules, managed_boxes)]
 #![deny(warnings)]
diff --git a/src/test/run-pass/issue-12582.rs b/src/test/run-pass/issue-12582.rs
index 418fd54cc139..f68ba5dab8ae 100644
--- a/src/test/run-pass/issue-12582.rs
+++ b/src/test/run-pass/issue-12582.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 pub fn main() {
     let x = 1i;
diff --git a/src/test/run-pass/issue-13027.rs b/src/test/run-pass/issue-13027.rs
index e1634e44847e..0efe64448c3d 100644
--- a/src/test/run-pass/issue-13027.rs
+++ b/src/test/run-pass/issue-13027.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 // Tests that match expression handles overlapped literal and range
 // properly in the presence of guard function.
diff --git a/src/test/run-pass/issue-2185.rs b/src/test/run-pass/issue-2185.rs
index 492e76552d45..974905487fe2 100644
--- a/src/test/run-pass/issue-2185.rs
+++ b/src/test/run-pass/issue-2185.rs
@@ -9,6 +9,7 @@
 // except according to those terms.
 
 // ignore-test
+// ignore-lexer-test FIXME #15881
 
 // notes on this test case:
 // On Thu, Apr 18, 2013-2014 at 6:30 PM, John Clements <clements@brinckerhoff.org> wrote:
diff --git a/src/test/run-pass/issue-2718.rs b/src/test/run-pass/issue-2718.rs
index b4807964d46d..c52dd5ce5e4e 100644
--- a/src/test/run-pass/issue-2718.rs
+++ b/src/test/run-pass/issue-2718.rs
@@ -8,6 +8,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15883
 
 #![feature(unsafe_destructor)]
 
diff --git a/src/test/run-pass/issue-3683.rs b/src/test/run-pass/issue-3683.rs
index aa7fa0cb5f04..e6c816666e79 100644
--- a/src/test/run-pass/issue-3683.rs
+++ b/src/test/run-pass/issue-3683.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 
 trait Foo {
diff --git a/src/test/run-pass/issue-4759-1.rs b/src/test/run-pass/issue-4759-1.rs
index ad8ee984217c..ce2f488b90c7 100644
--- a/src/test/run-pass/issue-4759-1.rs
+++ b/src/test/run-pass/issue-4759-1.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 trait U { fn f(self); }
 impl U for int { fn f(self) {} }
diff --git a/src/test/run-pass/issue-5280.rs b/src/test/run-pass/issue-5280.rs
index 977cd08ba377..bd8924650546 100644
--- a/src/test/run-pass/issue-5280.rs
+++ b/src/test/run-pass/issue-5280.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 type FontTableTag = u32;
 
diff --git a/src/test/run-pass/issue-5321-immediates-with-bare-self.rs b/src/test/run-pass/issue-5321-immediates-with-bare-self.rs
index fcb8092b7234..511b8a968306 100644
--- a/src/test/run-pass/issue-5321-immediates-with-bare-self.rs
+++ b/src/test/run-pass/issue-5321-immediates-with-bare-self.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 trait Fooable {
     fn yes(self);
diff --git a/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs b/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs
index 5c8db524cc2e..421ae8e94972 100644
--- a/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs
+++ b/src/test/run-pass/lexer-crlf-line-endings-string-literal-doc-comment.rs
@@ -16,6 +16,7 @@
 // this directory should enforce it.
 
 // ignore-pretty
+// ignore-lexer-test FIXME #15882
 
 /// Doc comment that ends in CRLF
 pub fn foo() {}
diff --git a/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs b/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs
index d5e277b46e04..36c663fc8474 100644
--- a/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs
+++ b/src/test/run-pass/lint-non-camel-case-types-non-uppercase-statics-unicode.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 
 #![forbid(non_camel_case_types)]
diff --git a/src/test/run-pass/match-range.rs b/src/test/run-pass/match-range.rs
index 7421ae958840..8b782520536a 100644
--- a/src/test/run-pass/match-range.rs
+++ b/src/test/run-pass/match-range.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 pub fn main() {
     match 5u {
diff --git a/src/test/run-pass/multibyte.rs b/src/test/run-pass/multibyte.rs
index ba3d89e3c7a6..77084836408a 100644
--- a/src/test/run-pass/multibyte.rs
+++ b/src/test/run-pass/multibyte.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 // Test that multibyte characters don't crash the compiler
 pub fn main() {
diff --git a/src/test/run-pass/raw-str.rs b/src/test/run-pass/raw-str.rs
index da0c9eed9e6e86ddaebfec10bf08a648cd692134..35e863d05a177dd06262b4d4302b5f40886ed0aa 100644
GIT binary patch
delta 36
rcmbQqwVP|hBS!I@)QZ$1-ICPe5(PKU2wztPWkXXlbIZ-w8J(B`?_LYo

delta 12
TcmdnZHIr+@BgV}vOy<l0A4~)9

diff --git a/src/test/run-pass/shebang.rs b/src/test/run-pass/shebang.rs
index 2f78513b95cf..bd3181842ec5 100644
--- a/src/test/run-pass/shebang.rs
+++ b/src/test/run-pass/shebang.rs
@@ -10,5 +10,7 @@
 // except according to those terms.
 
 // ignore-pretty: `expand` addes some preludes before shebang
+//
+// ignore-lexer-test FIXME #15878
 
 pub fn main() { println!("Hello World"); }
diff --git a/src/test/run-pass/struct-return.rs b/src/test/run-pass/struct-return.rs
index 93c0e7f5ba53..3e41b6d806cd 100644
--- a/src/test/run-pass/struct-return.rs
+++ b/src/test/run-pass/struct-return.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15883
 
 pub struct Quad { a: u64, b: u64, c: u64, d: u64 }
 pub struct Floats { a: f64, b: u8, c: f64 }
diff --git a/src/test/run-pass/trait-to-str.rs b/src/test/run-pass/trait-to-str.rs
index fbe40e837de5..9b910d24bdc5 100644
--- a/src/test/run-pass/trait-to-str.rs
+++ b/src/test/run-pass/trait-to-str.rs
@@ -7,7 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-
+//
+// ignore-lexer-test FIXME #15883
 
 
 trait to_str {
diff --git a/src/test/run-pass/trait-with-bounds-default.rs b/src/test/run-pass/trait-with-bounds-default.rs
index fc4acfd5bb3c..ec9f666eb198 100644
--- a/src/test/run-pass/trait-with-bounds-default.rs
+++ b/src/test/run-pass/trait-with-bounds-default.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 pub trait Clone2 {
     /// Returns a copy of the value. The contents of owned pointers
diff --git a/src/test/run-pass/traits-default-method-self.rs b/src/test/run-pass/traits-default-method-self.rs
index 1027008624a2..270b95452187 100644
--- a/src/test/run-pass/traits-default-method-self.rs
+++ b/src/test/run-pass/traits-default-method-self.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 
 trait Cat {
diff --git a/src/test/run-pass/traits-default-method-trivial.rs b/src/test/run-pass/traits-default-method-trivial.rs
index c6a7ab5ba496..474632a7ffa5 100644
--- a/src/test/run-pass/traits-default-method-trivial.rs
+++ b/src/test/run-pass/traits-default-method-trivial.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15877
 
 
 trait Cat {
diff --git a/src/test/run-pass/unsized.rs b/src/test/run-pass/unsized.rs
index f49e8f46e78e..0530c8a6ab3d 100644
--- a/src/test/run-pass/unsized.rs
+++ b/src/test/run-pass/unsized.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15879
 
 // Test syntax checks for `Sized?` syntax.
 
diff --git a/src/test/run-pass/unsized2.rs b/src/test/run-pass/unsized2.rs
index 9703b55cda76..ada4da37ba11 100644
--- a/src/test/run-pass/unsized2.rs
+++ b/src/test/run-pass/unsized2.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15879
 #![feature(struct_variant)]
 
 
diff --git a/src/test/run-pass/utf8-bom.rs b/src/test/run-pass/utf8-bom.rs
index ccd40cb88fe0..baa4e941ff09 100644
--- a/src/test/run-pass/utf8-bom.rs
+++ b/src/test/run-pass/utf8-bom.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 // This file has utf-8 BOM, it should be compiled normally without error.
 
diff --git a/src/test/run-pass/utf8.rs b/src/test/run-pass/utf8.rs
index 557d2e5878e1..a52828387bf9 100644
--- a/src/test/run-pass/utf8.rs
+++ b/src/test/run-pass/utf8.rs
@@ -7,7 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-
+//
+// ignore-lexer-test FIXME #15679
 // no-pretty-expanded FIXME #15189
 
 pub fn main() {
diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs
index 93c8111ad2d1..202427079a87 100644
--- a/src/test/run-pass/utf8_chars.rs
+++ b/src/test/run-pass/utf8_chars.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 use std::str;
 
diff --git a/src/test/run-pass/utf8_idents.rs b/src/test/run-pass/utf8_idents.rs
index ee4b2061a5da..f6c4776a11cc 100644
--- a/src/test/run-pass/utf8_idents.rs
+++ b/src/test/run-pass/utf8_idents.rs
@@ -7,6 +7,8 @@
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
+//
+// ignore-lexer-test FIXME #15679
 
 
 #![feature(non_ascii_idents)]

From 857bb60fe01272c338c5f89912561bdf147af94a Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 21 Jul 2014 19:26:20 -0700
Subject: [PATCH 11/12] Don't run lexer tests by default

---
 mk/tests.mk           | 2 +-
 src/grammar/README.md | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/mk/tests.mk b/mk/tests.mk
index 6068af8f7f46..2e500ffeb0a5 100644
--- a/mk/tests.mk
+++ b/mk/tests.mk
@@ -171,7 +171,7 @@ endif
 # Main test targets
 ######################################################################
 
-check: cleantmptestlogs cleantestlibs check-notidy tidy check-syntax
+check: cleantmptestlogs cleantestlibs check-notidy tidy
 
 check-notidy: cleantmptestlogs cleantestlibs all check-stage2
 	$(Q)$(CFG_PYTHON) $(S)src/etc/check-summary.py tmp/*.log
diff --git a/src/grammar/README.md b/src/grammar/README.md
index 69f8ab1e486a..f5b872cdc7f6 100644
--- a/src/grammar/README.md
+++ b/src/grammar/README.md
@@ -1,9 +1,10 @@
 Reference grammar.
 
 Uses [antlr4](http://www.antlr.org/) and a custom Rust tool to compare
-ASTs/token streams generated.
+ASTs/token streams generated. You can use the `check-syntax` make target to
+run all of the available tests.
 
-To use:
+To use manually:
 
 ```
 antlr4 RustLexer.g4

From 95a1ce6f3f3a31d4e64b04637cd252cc02b623dd Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 21 Jul 2014 22:53:36 -0700
Subject: [PATCH 12/12] Fix pretty test

---
 src/test/pretty/block-comment-wchar.pp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp
index 7def090edbb1..ef908fdd016f 100644
--- a/src/test/pretty/block-comment-wchar.pp
+++ b/src/test/pretty/block-comment-wchar.pp
@@ -14,6 +14,7 @@
 // ignore-tidy-cr
 // ignore-tidy-tab
 // pp-exact:block-comment-wchar.pp
+// ignore-lexer-test FIXME #15679
 fn f() {
     fn nested() {
         /*