introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a
string, and a second time when we unescape literals. This PR aims to
remove this duplication, by introducing a new `unescape` mode as a
single source of truth for character escaping rules
This commit is contained in:
Aleksey Kladov 2019-04-25 11:48:25 +03:00
parent 9b67bd42b7
commit bfa5f27847
24 changed files with 1046 additions and 768 deletions

View file

@ -1,20 +1,20 @@
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
--> $DIR/ascii-only-character-escape.rs:4:16
--> $DIR/ascii-only-character-escape.rs:4:14
|
LL | let x = "\x80";
| ^^
| ^^^^
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
--> $DIR/ascii-only-character-escape.rs:5:16
--> $DIR/ascii-only-character-escape.rs:5:14
|
LL | let y = "\xff";
| ^^
| ^^^^
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
--> $DIR/ascii-only-character-escape.rs:6:16
--> $DIR/ascii-only-character-escape.rs:6:14
|
LL | let z = "\xe2";
| ^^
| ^^^^
error: aborting due to 3 previous errors

View file

@ -34,11 +34,11 @@ error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
LL | b'é';
| ^
error: unterminated byte constant: b'a
--> $DIR/byte-literals.rs:14:5
error: unterminated byte constant
--> $DIR/byte-literals.rs:14:6
|
LL | b'a
| ^^^
| ^^^^
error: aborting due to 7 previous errors

View file

@ -23,10 +23,10 @@ LL | b"é";
| ^
error: unterminated double quote byte string
--> $DIR/byte-string-literals.rs:9:7
--> $DIR/byte-string-literals.rs:9:6
|
LL | b"a
| _______^
| ______^
LL | | }
| |__^

View file

@ -9,32 +9,27 @@ fn main() {
let _ = b'\u';
//~^ ERROR incorrect unicode escape sequence
//~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
let _ = b'\x5';
//~^ ERROR numeric character escape is too short
let _ = b'\xxy';
//~^ ERROR invalid character in numeric character escape: x
//~^^ ERROR invalid character in numeric character escape: y
let _ = '\x5';
//~^ ERROR numeric character escape is too short
let _ = '\xxy';
//~^ ERROR invalid character in numeric character escape: x
//~^^ ERROR invalid character in numeric character escape: y
let _ = b"\u{a4a4} \xf \u";
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
//~^^ ERROR invalid character in numeric character escape:
//~^^^ ERROR incorrect unicode escape sequence
//~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
let _ = "\xf \u";
//~^ ERROR invalid character in numeric character escape:
//~^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
//~^^^ ERROR incorrect unicode escape sequence
//~^^ ERROR incorrect unicode escape sequence
let _ = "\u8f";
//~^ ERROR incorrect unicode escape sequence

View file

@ -18,88 +18,58 @@ LL | let _ = b'\u';
|
= help: format of unicode escape sequences is `\u{...}`
error: unicode escape sequences cannot be used as a byte or in a byte string
--> $DIR/issue-23620-invalid-escapes.rs:10:15
|
LL | let _ = b'\u';
| ^^
error: numeric character escape is too short
--> $DIR/issue-23620-invalid-escapes.rs:14:17
--> $DIR/issue-23620-invalid-escapes.rs:13:15
|
LL | let _ = b'\x5';
| ^
| ^^^
error: invalid character in numeric character escape: x
--> $DIR/issue-23620-invalid-escapes.rs:17:17
--> $DIR/issue-23620-invalid-escapes.rs:16:17
|
LL | let _ = b'\xxy';
| ^
error: invalid character in numeric character escape: y
--> $DIR/issue-23620-invalid-escapes.rs:17:18
|
LL | let _ = b'\xxy';
| ^
error: numeric character escape is too short
--> $DIR/issue-23620-invalid-escapes.rs:21:16
--> $DIR/issue-23620-invalid-escapes.rs:19:14
|
LL | let _ = '\x5';
| ^
| ^^^
error: invalid character in numeric character escape: x
--> $DIR/issue-23620-invalid-escapes.rs:24:16
--> $DIR/issue-23620-invalid-escapes.rs:22:16
|
LL | let _ = '\xxy';
| ^
error: invalid character in numeric character escape: y
--> $DIR/issue-23620-invalid-escapes.rs:24:17
|
LL | let _ = '\xxy';
| ^
error: unicode escape sequences cannot be used as a byte or in a byte string
--> $DIR/issue-23620-invalid-escapes.rs:28:15
--> $DIR/issue-23620-invalid-escapes.rs:25:15
|
LL | let _ = b"\u{a4a4} \xf \u";
| ^^^^^^^^
error: invalid character in numeric character escape:
--> $DIR/issue-23620-invalid-escapes.rs:28:27
--> $DIR/issue-23620-invalid-escapes.rs:25:27
|
LL | let _ = b"\u{a4a4} \xf \u";
| ^
error: incorrect unicode escape sequence
--> $DIR/issue-23620-invalid-escapes.rs:28:28
--> $DIR/issue-23620-invalid-escapes.rs:25:28
|
LL | let _ = b"\u{a4a4} \xf \u";
| ^^ incorrect unicode escape sequence
|
= help: format of unicode escape sequences is `\u{...}`
error: unicode escape sequences cannot be used as a byte or in a byte string
--> $DIR/issue-23620-invalid-escapes.rs:28:28
|
LL | let _ = b"\u{a4a4} \xf \u";
| ^^
error: invalid character in numeric character escape:
--> $DIR/issue-23620-invalid-escapes.rs:34:17
--> $DIR/issue-23620-invalid-escapes.rs:30:17
|
LL | let _ = "\xf \u";
| ^
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
--> $DIR/issue-23620-invalid-escapes.rs:34:16
|
LL | let _ = "\xf \u";
| ^^
error: incorrect unicode escape sequence
--> $DIR/issue-23620-invalid-escapes.rs:34:18
--> $DIR/issue-23620-invalid-escapes.rs:30:18
|
LL | let _ = "\xf \u";
| ^^ incorrect unicode escape sequence
@ -107,12 +77,12 @@ LL | let _ = "\xf \u";
= help: format of unicode escape sequences is `\u{...}`
error: incorrect unicode escape sequence
--> $DIR/issue-23620-invalid-escapes.rs:39:14
--> $DIR/issue-23620-invalid-escapes.rs:34:14
|
LL | let _ = "\u8f";
| ^^--
| |
| help: format of unicode escape sequences uses braces: `\u{8f}`
| |
| help: format of unicode escape sequences uses braces: `\u{8f}`
error: aborting due to 18 previous errors
error: aborting due to 13 previous errors

View file

@ -1,14 +1,14 @@
error: numeric character escape is too short
--> $DIR/lex-bad-char-literals-1.rs:3:8
--> $DIR/lex-bad-char-literals-1.rs:3:6
|
LL | '\x1'
| ^
| ^^^
error: numeric character escape is too short
--> $DIR/lex-bad-char-literals-1.rs:7:8
--> $DIR/lex-bad-char-literals-1.rs:7:6
|
LL | "\x1"
| ^
| ^^^
error: unknown character escape: \u{25cf}
--> $DIR/lex-bad-char-literals-1.rs:11:7

View file

@ -3,6 +3,10 @@ error: character literal may only contain one codepoint
|
LL | 'nope'
| ^^^^^^
help: if you meant to write a `str` literal, use double quotes
|
LL | "nope"
| ^^^^^^
error[E0601]: `main` function not found in crate `lex_bad_char_literals_2`
|

View file

@ -1,5 +1,5 @@
//
// This test needs to the last one appearing in this file as it kills the parser
static c: char =
' //~ ERROR: character literal may only contain one codepoint
' //~ ERROR: unterminated character literal
;

View file

@ -1,8 +1,8 @@
error: character literal may only contain one codepoint: '●
error: unterminated character literal
--> $DIR/lex-bad-char-literals-4.rs:4:5
|
LL | '●
| ^^
| ^^^^
error: aborting due to previous error

View file

@ -3,18 +3,30 @@ error: character literal may only contain one codepoint
|
LL | let x: &str = 'ab';
| ^^^^
help: if you meant to write a `str` literal, use double quotes
|
LL | let x: &str = "ab";
| ^^^^
error: character literal may only contain one codepoint
--> $DIR/lex-bad-char-literals-6.rs:4:19
|
LL | let y: char = 'cd';
| ^^^^
help: if you meant to write a `str` literal, use double quotes
|
LL | let y: char = "cd";
| ^^^^
error: character literal may only contain one codepoint
--> $DIR/lex-bad-char-literals-6.rs:6:13
|
LL | let z = 'ef';
| ^^^^
help: if you meant to write a `str` literal, use double quotes
|
LL | let z = "ef";
| ^^^^
error[E0277]: can't compare `&str` with `char`
--> $DIR/lex-bad-char-literals-6.rs:9:10

View file

@ -0,0 +1,14 @@
// compile-flags: -Z continue-parse-after-error
fn main() {
let _: char = '';
//~^ ERROR: empty character literal
let _: char = '\u{}';
//~^ ERROR: empty unicode escape (must have at least 1 hex digit)
// Next two are OK, but may befool error recovery
let _ = '/';
let _ = b'/';
let _ = ' hello // here's a comment
//~^ ERROR: unterminated character literal
}

View file

@ -0,0 +1,20 @@
error: empty character literal
--> $DIR/lex-bad-char-literals-7.rs:3:20
|
LL | let _: char = '';
| ^
error: empty unicode escape (must have at least 1 hex digit)
--> $DIR/lex-bad-char-literals-7.rs:5:20
|
LL | let _: char = '\u{}';
| ^^^^
error: unterminated character literal
--> $DIR/lex-bad-char-literals-7.rs:12:13
|
LL | let _ = ' hello // here's a comment
| ^^^^^^^^
error: aborting due to 3 previous errors

View file

@ -0,0 +1,10 @@
macro_rules! black_hole {
($($tt:tt)*) => {}
}
fn main() {
black_hole! { '\u{FFFFFF}' }
//~^ ERROR: invalid unicode character escape
black_hole! { "this is surrogate: \u{DAAA}" }
//~^ ERROR: invalid unicode character escape
}

View file

@ -0,0 +1,18 @@
error: invalid unicode character escape
--> $DIR/literals-are-validated-before-expansion.rs:6:20
|
LL | black_hole! { '\u{FFFFFF}' }
| ^^^^^^^^^^
|
= help: unicode escape must be at most 10FFFF
error: invalid unicode character escape
--> $DIR/literals-are-validated-before-expansion.rs:8:39
|
LL | black_hole! { "this is surrogate: \u{DAAA}" }
| ^^^^^^^^
|
= help: unicode escape must not be a surrogate
error: aborting due to 2 previous errors

View file

@ -1,8 +1,8 @@
error: unterminated unicode escape (needed a `}`)
--> $DIR/new-unicode-escapes-1.rs:2:21
--> $DIR/new-unicode-escapes-1.rs:2:14
|
LL | let s = "\u{2603";
| ^
| ^^^^^^^
error: aborting due to previous error

View file

@ -1,8 +1,8 @@
error: overlong unicode escape (must have at most 6 hex digits)
--> $DIR/new-unicode-escapes-2.rs:2:17
--> $DIR/new-unicode-escapes-2.rs:2:14
|
LL | let s = "\u{260311111111}";
| ^^^^^^^^^^^^
| ^^^^^^^^^^^^^^^^
error: aborting due to previous error

View file

@ -1,16 +1,16 @@
error: invalid unicode character escape
--> $DIR/new-unicode-escapes-3.rs:2:14
--> $DIR/new-unicode-escapes-3.rs:2:15
|
LL | let s1 = "\u{d805}";
| ^^^^^^^^^^
| ^^^^^^^^
|
= help: unicode escape must not be a surrogate
error: invalid unicode character escape
--> $DIR/new-unicode-escapes-3.rs:3:14
--> $DIR/new-unicode-escapes-3.rs:3:15
|
LL | let s2 = "\u{ffffff}";
| ^^^^^^^^^^^^
| ^^^^^^^^^^
|
= help: unicode escape must be at most 10FFFF