From f8fd32ef9dd48a216ae5ca44ca65ea8f2205f581 Mon Sep 17 00:00:00 2001 From: Corey Richardson Date: Mon, 14 Jul 2014 20:45:39 -0700 Subject: [PATCH] Byte/raw binary literal fixes --- src/grammar/.gitignore | 4 ++++ src/grammar/RustLexer.g4 | 9 +++++++-- src/grammar/verify.rs | 15 +++++++++++++-- 3 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 src/grammar/.gitignore diff --git a/src/grammar/.gitignore b/src/grammar/.gitignore new file mode 100644 index 000000000000..e77db28967e3 --- /dev/null +++ b/src/grammar/.gitignore @@ -0,0 +1,4 @@ +verify +*.class +*.java +*.tokens diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index e4640ccfb164..f2705e5421b9 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -44,6 +44,7 @@ SHR : '>>' ; BINOP : PLUS + | SLASH | MINUS | STAR | PERCENT @@ -95,6 +96,10 @@ LIT_CHAR : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' ; +LIT_BYTE + : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' + ; + fragment INT_SUFFIX : 'i' | 'i8' @@ -130,7 +135,7 @@ LIT_STR ; LIT_BINARY : 'b' LIT_STR ; -LIT_BINARY_RAW : 'b' LIT_STR_RAW ; +LIT_BINARY_RAW : 'rb' LIT_STR_RAW ; /* this is a bit messy */ @@ -159,7 +164,7 @@ OUTER_DOC_COMMENT : '//!' ~[\r\n]* -> type(DOC_COMMENT) ; LINE_COMMENT : '//' ~[\r\n]* -> type(COMMENT) ; DOC_BLOCK_COMMENT - : ('/**' | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT) + : ('/**' ~[*] | '/*!') (DOC_BLOCK_COMMENT | .)*? '*/' -> type(DOC_COMMENT) ; BLOCK_COMMENT : '/*' (BLOCK_COMMENT | .)*? '*/' -> type(COMMENT) ; diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 38dd86e07729..c20aebe65d50 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -71,6 +71,7 @@ fn parse_token_list(file: &str) -> HashMap { "IDENT" => id(), "PLUS" => BINOP(PLUS), "LIT_CHAR" => LIT_CHAR(Name(0)), + "LIT_BYTE" => LIT_BYTE(Name(0)), "EQ" => EQ, "RBRACKET" => RBRACKET, "COMMENT" => COMMENT, @@ -124,7 +125,7 @@ fn str_to_binop(s: &str) -> BinOp { } } -/// Assuming a raw string/binary literal, strip out the leading/trailing +/// Assuming a string/binary literal, strip out the leading/trailing /// hashes and surrounding quotes/raw/binary prefix. fn fix(mut lit: &str) -> ast::Name { if lit.char_at(0) == 'r' { @@ -143,6 +144,15 @@ fn fix(mut lit: &str) -> ast::Name { parse::token::intern(lit.slice(leading_hashes + 1, lit.len() - leading_hashes - 1)) } +/// Assuming a char/byte literal, strip the 'b' prefix and the single quotes. +fn fixchar(mut lit: &str) -> ast::Name { + if lit.char_at(0) == 'b' { + lit = lit.slice_from(1); + } + + parse::token::intern(lit.slice(1, lit.len() - 1)) +} + fn count(lit: &str) -> uint { lit.chars().take_while(|c| *c == '#').count() } @@ -167,7 +177,8 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { BINOPEQ(..) => BINOPEQ(str_to_binop(content.slice_to(content.len() - 1))), LIT_STR(..) => LIT_STR(fix(content)), LIT_STR_RAW(..) => LIT_STR_RAW(fix(content), count(content)), - LIT_CHAR(..) => LIT_CHAR(nm), + LIT_CHAR(..) => LIT_CHAR(fixchar(content)), + LIT_BYTE(..) => LIT_BYTE(fixchar(content)), DOC_COMMENT(..) => DOC_COMMENT(nm), LIT_INTEGER(..) => LIT_INTEGER(nm), LIT_FLOAT(..) => LIT_FLOAT(nm),