From f8fd32ef9dd48a216ae5ca44ca65ea8f2205f581 Mon Sep 17 00:00:00 2001
From: Corey Richardson <corey@octayn.net>
Date: Mon, 14 Jul 2014 20:45:39 -0700
Subject: [PATCH] Byte/raw binary literal fixes

---
 src/grammar/.gitignore   |  4 ++++
 src/grammar/RustLexer.g4 |  9 +++++++--
 src/grammar/verify.rs    | 15 +++++++++++++--
 3 files changed, 24 insertions(+), 4 deletions(-)
 create mode 100644 src/grammar/.gitignore

diff --git a/src/grammar/.gitignore b/src/grammar/.gitignore
new file mode 100644
index 000000000000..e77db28967e3
--- /dev/null
+++ b/src/grammar/.gitignore
@@ -0,0 +1,4 @@
+verify
+*.class
+*.java
+*.tokens
diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4
index e4640ccfb164..f2705e5421b9 100644
--- a/src/grammar/RustLexer.g4
+++ b/src/grammar/RustLexer.g4
@@ -44,6 +44,7 @@ SHR     : '>>' ;
 
 BINOP
     : PLUS
+    | SLASH
     | MINUS
     | STAR
     | PERCENT
@@ -95,6 +96,10 @@ LIT_CHAR
   : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\''
   ;
 
+LIT_BYTE
+  : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\''
+  ;
+
 fragment INT_SUFFIX
   : 'i'
   | 'i8'
@@ -130,7 +135,7 @@ LIT_STR
   ;
 
 LIT_BINARY : 'b' LIT_STR ;
-LIT_BINARY_RAW : 'b' LIT_STR_RAW ;
+LIT_BINARY_RAW : 'rb' LIT_STR_RAW ;
 
 /* this is a bit messy */
 
@@ -159,7 +164,7 @@ OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ;
 LINE_COMMENT      : '//' ~[\r\n]* -> type(COMMENT) ;
 
 DOC_BLOCK_COMMENT
-  : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
+  : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT)
   ;
 
 BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ;
diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs
index 38dd86e07729..c20aebe65d50 100644
--- a/src/grammar/verify.rs
+++ b/src/grammar/verify.rs
@@ -71,6 +71,7 @@ fn parse_token_list(file: &str) -> HashMap<String, Token> {
             "IDENT" => id(),
             "PLUS" => BINOP(PLUS),
             "LIT_CHAR" => LIT_CHAR(Name(0)),
+            "LIT_BYTE" => LIT_BYTE(Name(0)),
             "EQ" => EQ,
             "RBRACKET" => RBRACKET,
             "COMMENT" => COMMENT,
@@ -124,7 +125,7 @@ fn str_to_binop(s: &str) -> BinOp {
     }
 }
 
-/// Assuming a raw string/binary literal, strip out the leading/trailing
+/// Assuming a string/binary literal, strip out the leading/trailing
 /// hashes and surrounding quotes/raw/binary prefix.
 fn fix(mut lit: &str) -> ast::Name {
     if lit.char_at(0) == 'r' {
@@ -143,6 +144,15 @@ fn fix(mut lit: &str) -> ast::Name {
     parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1))
 }
 
+/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes.
+fn fixchar(mut lit: &str) -> ast::Name {
+    if lit.char_at(0) == 'b' {
+        lit = lit.slice_from(1);
+    }
+
+    parse::token::intern(lit.slice(1, lit.len() - 1))
+}
+
 fn count(lit: &str) -> uint {
     lit.chars().take_while(|c| *c == '#').count()
 }
@@ -167,7 +177,8 @@ fn parse_antlr_token(s: &str, tokens: &HashMap<String, Token>) -> TokenAndSpan {
         BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))),
         LIT_STR(..) => LIT_STR(fix(content)),
         LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)),
-        LIT_CHAR(..) => LIT_CHAR(nm),
+        LIT_CHAR(..) => LIT_CHAR(fixchar(content)),
+        LIT_BYTE(..) => LIT_BYTE(fixchar(content)),
         DOC_COMMENT(..) => DOC_COMMENT(nm),
         LIT_INTEGER(..) => LIT_INTEGER(nm),
         LIT_FLOAT(..) => LIT_FLOAT(nm),