introduce unescape module
Currently, we deal with escape sequences twice: once when we lex a string, and a second time when we unescape literals. This PR aims to remove this duplication, by introducing a new `unescape` mode as a single source of truth for character escaping rules
This commit is contained in:
parent
9b67bd42b7
commit
bfa5f27847
24 changed files with 1046 additions and 768 deletions
|
|
@ -1,20 +1,20 @@
|
|||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
--> $DIR/ascii-only-character-escape.rs:4:16
|
||||
--> $DIR/ascii-only-character-escape.rs:4:14
|
||||
|
|
||||
LL | let x = "\x80";
|
||||
| ^^
|
||||
| ^^^^
|
||||
|
||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
--> $DIR/ascii-only-character-escape.rs:5:16
|
||||
--> $DIR/ascii-only-character-escape.rs:5:14
|
||||
|
|
||||
LL | let y = "\xff";
|
||||
| ^^
|
||||
| ^^^^
|
||||
|
||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
--> $DIR/ascii-only-character-escape.rs:6:16
|
||||
--> $DIR/ascii-only-character-escape.rs:6:14
|
||||
|
|
||||
LL | let z = "\xe2";
|
||||
| ^^
|
||||
| ^^^^
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
|
||||
|
|
|
|||
|
|
@ -34,11 +34,11 @@ error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
|
|||
LL | b'é';
|
||||
| ^
|
||||
|
||||
error: unterminated byte constant: b'a
|
||||
--> $DIR/byte-literals.rs:14:5
|
||||
error: unterminated byte constant
|
||||
--> $DIR/byte-literals.rs:14:6
|
||||
|
|
||||
LL | b'a
|
||||
| ^^^
|
||||
| ^^^^
|
||||
|
||||
error: aborting due to 7 previous errors
|
||||
|
||||
|
|
|
|||
|
|
@ -23,10 +23,10 @@ LL | b"é";
|
|||
| ^
|
||||
|
||||
error: unterminated double quote byte string
|
||||
--> $DIR/byte-string-literals.rs:9:7
|
||||
--> $DIR/byte-string-literals.rs:9:6
|
||||
|
|
||||
LL | b"a
|
||||
| _______^
|
||||
| ______^
|
||||
LL | | }
|
||||
| |__^
|
||||
|
||||
|
|
|
|||
|
|
@ -9,32 +9,27 @@ fn main() {
|
|||
|
||||
let _ = b'\u';
|
||||
//~^ ERROR incorrect unicode escape sequence
|
||||
//~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
|
||||
let _ = b'\x5';
|
||||
//~^ ERROR numeric character escape is too short
|
||||
|
||||
let _ = b'\xxy';
|
||||
//~^ ERROR invalid character in numeric character escape: x
|
||||
//~^^ ERROR invalid character in numeric character escape: y
|
||||
|
||||
let _ = '\x5';
|
||||
//~^ ERROR numeric character escape is too short
|
||||
|
||||
let _ = '\xxy';
|
||||
//~^ ERROR invalid character in numeric character escape: x
|
||||
//~^^ ERROR invalid character in numeric character escape: y
|
||||
|
||||
let _ = b"\u{a4a4} \xf \u";
|
||||
//~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
//~^^ ERROR invalid character in numeric character escape:
|
||||
//~^^^ ERROR incorrect unicode escape sequence
|
||||
//~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
|
||||
|
||||
let _ = "\xf \u";
|
||||
//~^ ERROR invalid character in numeric character escape:
|
||||
//~^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
//~^^^ ERROR incorrect unicode escape sequence
|
||||
//~^^ ERROR incorrect unicode escape sequence
|
||||
|
||||
let _ = "\u8f";
|
||||
//~^ ERROR incorrect unicode escape sequence
|
||||
|
|
|
|||
|
|
@ -18,88 +18,58 @@ LL | let _ = b'\u';
|
|||
|
|
||||
= help: format of unicode escape sequences is `\u{...}`
|
||||
|
||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:10:15
|
||||
|
|
||||
LL | let _ = b'\u';
|
||||
| ^^
|
||||
|
||||
error: numeric character escape is too short
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:14:17
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:13:15
|
||||
|
|
||||
LL | let _ = b'\x5';
|
||||
| ^
|
||||
| ^^^
|
||||
|
||||
error: invalid character in numeric character escape: x
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:17:17
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:16:17
|
||||
|
|
||||
LL | let _ = b'\xxy';
|
||||
| ^
|
||||
|
||||
error: invalid character in numeric character escape: y
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:17:18
|
||||
|
|
||||
LL | let _ = b'\xxy';
|
||||
| ^
|
||||
|
||||
error: numeric character escape is too short
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:21:16
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:19:14
|
||||
|
|
||||
LL | let _ = '\x5';
|
||||
| ^
|
||||
| ^^^
|
||||
|
||||
error: invalid character in numeric character escape: x
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:24:16
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:22:16
|
||||
|
|
||||
LL | let _ = '\xxy';
|
||||
| ^
|
||||
|
||||
error: invalid character in numeric character escape: y
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:24:17
|
||||
|
|
||||
LL | let _ = '\xxy';
|
||||
| ^
|
||||
|
||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:28:15
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:25:15
|
||||
|
|
||||
LL | let _ = b"\u{a4a4} \xf \u";
|
||||
| ^^^^^^^^
|
||||
|
||||
error: invalid character in numeric character escape:
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:28:27
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:25:27
|
||||
|
|
||||
LL | let _ = b"\u{a4a4} \xf \u";
|
||||
| ^
|
||||
|
||||
error: incorrect unicode escape sequence
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:28:28
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:25:28
|
||||
|
|
||||
LL | let _ = b"\u{a4a4} \xf \u";
|
||||
| ^^ incorrect unicode escape sequence
|
||||
|
|
||||
= help: format of unicode escape sequences is `\u{...}`
|
||||
|
||||
error: unicode escape sequences cannot be used as a byte or in a byte string
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:28:28
|
||||
|
|
||||
LL | let _ = b"\u{a4a4} \xf \u";
|
||||
| ^^
|
||||
|
||||
error: invalid character in numeric character escape:
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:34:17
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:30:17
|
||||
|
|
||||
LL | let _ = "\xf \u";
|
||||
| ^
|
||||
|
||||
error: this form of character escape may only be used with characters in the range [\x00-\x7f]
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:34:16
|
||||
|
|
||||
LL | let _ = "\xf \u";
|
||||
| ^^
|
||||
|
||||
error: incorrect unicode escape sequence
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:34:18
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:30:18
|
||||
|
|
||||
LL | let _ = "\xf \u";
|
||||
| ^^ incorrect unicode escape sequence
|
||||
|
|
@ -107,12 +77,12 @@ LL | let _ = "\xf \u";
|
|||
= help: format of unicode escape sequences is `\u{...}`
|
||||
|
||||
error: incorrect unicode escape sequence
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:39:14
|
||||
--> $DIR/issue-23620-invalid-escapes.rs:34:14
|
||||
|
|
||||
LL | let _ = "\u8f";
|
||||
| ^^--
|
||||
| |
|
||||
| help: format of unicode escape sequences uses braces: `\u{8f}`
|
||||
| |
|
||||
| help: format of unicode escape sequences uses braces: `\u{8f}`
|
||||
|
||||
error: aborting due to 18 previous errors
|
||||
error: aborting due to 13 previous errors
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
error: numeric character escape is too short
|
||||
--> $DIR/lex-bad-char-literals-1.rs:3:8
|
||||
--> $DIR/lex-bad-char-literals-1.rs:3:6
|
||||
|
|
||||
LL | '\x1'
|
||||
| ^
|
||||
| ^^^
|
||||
|
||||
error: numeric character escape is too short
|
||||
--> $DIR/lex-bad-char-literals-1.rs:7:8
|
||||
--> $DIR/lex-bad-char-literals-1.rs:7:6
|
||||
|
|
||||
LL | "\x1"
|
||||
| ^
|
||||
| ^^^
|
||||
|
||||
error: unknown character escape: \u{25cf}
|
||||
--> $DIR/lex-bad-char-literals-1.rs:11:7
|
||||
|
|
|
|||
|
|
@ -3,6 +3,10 @@ error: character literal may only contain one codepoint
|
|||
|
|
||||
LL | 'nope'
|
||||
| ^^^^^^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | "nope"
|
||||
| ^^^^^^
|
||||
|
||||
error[E0601]: `main` function not found in crate `lex_bad_char_literals_2`
|
||||
|
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
//
|
||||
// This test needs to the last one appearing in this file as it kills the parser
|
||||
static c: char =
|
||||
'● //~ ERROR: character literal may only contain one codepoint
|
||||
'● //~ ERROR: unterminated character literal
|
||||
;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
error: character literal may only contain one codepoint: '●
|
||||
error: unterminated character literal
|
||||
--> $DIR/lex-bad-char-literals-4.rs:4:5
|
||||
|
|
||||
LL | '●
|
||||
| ^^
|
||||
| ^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
|
|
|
|||
|
|
@ -3,18 +3,30 @@ error: character literal may only contain one codepoint
|
|||
|
|
||||
LL | let x: &str = 'ab';
|
||||
| ^^^^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | let x: &str = "ab";
|
||||
| ^^^^
|
||||
|
||||
error: character literal may only contain one codepoint
|
||||
--> $DIR/lex-bad-char-literals-6.rs:4:19
|
||||
|
|
||||
LL | let y: char = 'cd';
|
||||
| ^^^^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | let y: char = "cd";
|
||||
| ^^^^
|
||||
|
||||
error: character literal may only contain one codepoint
|
||||
--> $DIR/lex-bad-char-literals-6.rs:6:13
|
||||
|
|
||||
LL | let z = 'ef';
|
||||
| ^^^^
|
||||
help: if you meant to write a `str` literal, use double quotes
|
||||
|
|
||||
LL | let z = "ef";
|
||||
| ^^^^
|
||||
|
||||
error[E0277]: can't compare `&str` with `char`
|
||||
--> $DIR/lex-bad-char-literals-6.rs:9:10
|
||||
|
|
|
|||
14
src/test/ui/parser/lex-bad-char-literals-7.rs
Normal file
14
src/test/ui/parser/lex-bad-char-literals-7.rs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
// compile-flags: -Z continue-parse-after-error
|
||||
fn main() {
|
||||
let _: char = '';
|
||||
//~^ ERROR: empty character literal
|
||||
let _: char = '\u{}';
|
||||
//~^ ERROR: empty unicode escape (must have at least 1 hex digit)
|
||||
|
||||
// Next two are OK, but may befool error recovery
|
||||
let _ = '/';
|
||||
let _ = b'/';
|
||||
|
||||
let _ = ' hello // here's a comment
|
||||
//~^ ERROR: unterminated character literal
|
||||
}
|
||||
20
src/test/ui/parser/lex-bad-char-literals-7.stderr
Normal file
20
src/test/ui/parser/lex-bad-char-literals-7.stderr
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
error: empty character literal
|
||||
--> $DIR/lex-bad-char-literals-7.rs:3:20
|
||||
|
|
||||
LL | let _: char = '';
|
||||
| ^
|
||||
|
||||
error: empty unicode escape (must have at least 1 hex digit)
|
||||
--> $DIR/lex-bad-char-literals-7.rs:5:20
|
||||
|
|
||||
LL | let _: char = '\u{}';
|
||||
| ^^^^
|
||||
|
||||
error: unterminated character literal
|
||||
--> $DIR/lex-bad-char-literals-7.rs:12:13
|
||||
|
|
||||
LL | let _ = ' hello // here's a comment
|
||||
| ^^^^^^^^
|
||||
|
||||
error: aborting due to 3 previous errors
|
||||
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
macro_rules! black_hole {
|
||||
($($tt:tt)*) => {}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
black_hole! { '\u{FFFFFF}' }
|
||||
//~^ ERROR: invalid unicode character escape
|
||||
black_hole! { "this is surrogate: \u{DAAA}" }
|
||||
//~^ ERROR: invalid unicode character escape
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
error: invalid unicode character escape
|
||||
--> $DIR/literals-are-validated-before-expansion.rs:6:20
|
||||
|
|
||||
LL | black_hole! { '\u{FFFFFF}' }
|
||||
| ^^^^^^^^^^
|
||||
|
|
||||
= help: unicode escape must be at most 10FFFF
|
||||
|
||||
error: invalid unicode character escape
|
||||
--> $DIR/literals-are-validated-before-expansion.rs:8:39
|
||||
|
|
||||
LL | black_hole! { "this is surrogate: \u{DAAA}" }
|
||||
| ^^^^^^^^
|
||||
|
|
||||
= help: unicode escape must not be a surrogate
|
||||
|
||||
error: aborting due to 2 previous errors
|
||||
|
||||
|
|
@ -1,8 +1,8 @@
|
|||
error: unterminated unicode escape (needed a `}`)
|
||||
--> $DIR/new-unicode-escapes-1.rs:2:21
|
||||
--> $DIR/new-unicode-escapes-1.rs:2:14
|
||||
|
|
||||
LL | let s = "\u{2603";
|
||||
| ^
|
||||
| ^^^^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
error: overlong unicode escape (must have at most 6 hex digits)
|
||||
--> $DIR/new-unicode-escapes-2.rs:2:17
|
||||
--> $DIR/new-unicode-escapes-2.rs:2:14
|
||||
|
|
||||
LL | let s = "\u{260311111111}";
|
||||
| ^^^^^^^^^^^^
|
||||
| ^^^^^^^^^^^^^^^^
|
||||
|
||||
error: aborting due to previous error
|
||||
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
error: invalid unicode character escape
|
||||
--> $DIR/new-unicode-escapes-3.rs:2:14
|
||||
--> $DIR/new-unicode-escapes-3.rs:2:15
|
||||
|
|
||||
LL | let s1 = "\u{d805}";
|
||||
| ^^^^^^^^^^
|
||||
| ^^^^^^^^
|
||||
|
|
||||
= help: unicode escape must not be a surrogate
|
||||
|
||||
error: invalid unicode character escape
|
||||
--> $DIR/new-unicode-escapes-3.rs:3:14
|
||||
--> $DIR/new-unicode-escapes-3.rs:3:15
|
||||
|
|
||||
LL | let s2 = "\u{ffffff}";
|
||||
| ^^^^^^^^^^^^
|
||||
| ^^^^^^^^^^
|
||||
|
|
||||
= help: unicode escape must be at most 10FFFF
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue