introduce unescape module

Currently, we deal with escape sequences twice: once when we lex a string, and a second time when we unescape literals. This PR aims to remove this duplication, by introducing a new `unescape` mode as a single source of truth for character escaping rules
2019-04-25 11:48:25 +03:00 · 2019-04-25 11:48:25 +03:00 · bfa5f27847
commit bfa5f27847
parent 9b67bd42b7
24 changed files with 1046 additions and 768 deletions
--- a/src/test/ui/parser/ascii-only-character-escape.stderr
+++ b/src/test/ui/parser/ascii-only-character-escape.stderr
@ -1,20 +1,20 @@
 error: this form of character escape may only be used with characters in the range [\x00-\x7f]
-  --> $DIR/ascii-only-character-escape.rs:4:16
+  --> $DIR/ascii-only-character-escape.rs:4:14
   |
 LL |     let x = "\x80";
-   |                ^^
+   |              ^^^^

 error: this form of character escape may only be used with characters in the range [\x00-\x7f]
-  --> $DIR/ascii-only-character-escape.rs:5:16
+  --> $DIR/ascii-only-character-escape.rs:5:14
   |
 LL |     let y = "\xff";
-   |                ^^
+   |              ^^^^

 error: this form of character escape may only be used with characters in the range [\x00-\x7f]
-  --> $DIR/ascii-only-character-escape.rs:6:16
+  --> $DIR/ascii-only-character-escape.rs:6:14
   |
 LL |     let z = "\xe2";
-   |                ^^
+   |              ^^^^

 error: aborting due to 3 previous errors

--- a/src/test/ui/parser/byte-literals.stderr
+++ b/src/test/ui/parser/byte-literals.stderr
@ -34,11 +34,11 @@ error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
 LL |     b'é';
   |       ^

-error: unterminated byte constant: b'a
-  --> $DIR/byte-literals.rs:14:5
+error: unterminated byte constant
+  --> $DIR/byte-literals.rs:14:6
   |
 LL |     b'a
-   |     ^^^
+   |      ^^^^

 error: aborting due to 7 previous errors

--- a/src/test/ui/parser/byte-string-literals.stderr
+++ b/src/test/ui/parser/byte-string-literals.stderr
@ -23,10 +23,10 @@ LL |     b"é";
   |       ^

 error: unterminated double quote byte string
-  --> $DIR/byte-string-literals.rs:9:7
+  --> $DIR/byte-string-literals.rs:9:6
   |
 LL |       b"a
-   |  _______^
+   |  ______^
 LL | | }
   | |__^

--- a/src/test/ui/parser/issue-23620-invalid-escapes.rs
+++ b/src/test/ui/parser/issue-23620-invalid-escapes.rs
@ -9,32 +9,27 @@ fn main() {

    let _ = b'\u';
    //~^ ERROR incorrect unicode escape sequence
-    //~^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

    let _ = b'\x5';
    //~^ ERROR numeric character escape is too short

    let _ = b'\xxy';
    //~^ ERROR invalid character in numeric character escape: x
-    //~^^ ERROR invalid character in numeric character escape: y

    let _ = '\x5';
    //~^ ERROR numeric character escape is too short

    let _ = '\xxy';
    //~^ ERROR invalid character in numeric character escape: x
-    //~^^ ERROR invalid character in numeric character escape: y

    let _ = b"\u{a4a4} \xf \u";
    //~^ ERROR unicode escape sequences cannot be used as a byte or in a byte string
    //~^^ ERROR invalid character in numeric character escape:
    //~^^^ ERROR incorrect unicode escape sequence
-    //~^^^^ ERROR unicode escape sequences cannot be used as a byte or in a byte string

    let _ = "\xf \u";
    //~^ ERROR invalid character in numeric character escape:
-    //~^^ ERROR form of character escape may only be used with characters in the range [\x00-\x7f]
-    //~^^^ ERROR incorrect unicode escape sequence
+    //~^^ ERROR incorrect unicode escape sequence

    let _ = "\u8f";
    //~^ ERROR incorrect unicode escape sequence
--- a/src/test/ui/parser/issue-23620-invalid-escapes.stderr
+++ b/src/test/ui/parser/issue-23620-invalid-escapes.stderr
@ -18,88 +18,58 @@ LL |     let _ = b'\u';
   |
   = help: format of unicode escape sequences is `\u{...}`

-error: unicode escape sequences cannot be used as a byte or in a byte string
-  --> $DIR/issue-23620-invalid-escapes.rs:10:15
-   |
-LL |     let _ = b'\u';
-   |               ^^
-
 error: numeric character escape is too short
-  --> $DIR/issue-23620-invalid-escapes.rs:14:17
+  --> $DIR/issue-23620-invalid-escapes.rs:13:15
   |
 LL |     let _ = b'\x5';
-   |                 ^
+   |               ^^^

 error: invalid character in numeric character escape: x
-  --> $DIR/issue-23620-invalid-escapes.rs:17:17
+  --> $DIR/issue-23620-invalid-escapes.rs:16:17
   |
 LL |     let _ = b'\xxy';
   |                 ^

-error: invalid character in numeric character escape: y
-  --> $DIR/issue-23620-invalid-escapes.rs:17:18
-   |
-LL |     let _ = b'\xxy';
-   |                  ^
-
 error: numeric character escape is too short
-  --> $DIR/issue-23620-invalid-escapes.rs:21:16
+  --> $DIR/issue-23620-invalid-escapes.rs:19:14
   |
 LL |     let _ = '\x5';
-   |                ^
+   |              ^^^

 error: invalid character in numeric character escape: x
-  --> $DIR/issue-23620-invalid-escapes.rs:24:16
+  --> $DIR/issue-23620-invalid-escapes.rs:22:16
   |
 LL |     let _ = '\xxy';
   |                ^

-error: invalid character in numeric character escape: y
-  --> $DIR/issue-23620-invalid-escapes.rs:24:17
-   |
-LL |     let _ = '\xxy';
-   |                 ^
-
 error: unicode escape sequences cannot be used as a byte or in a byte string
-  --> $DIR/issue-23620-invalid-escapes.rs:28:15
+  --> $DIR/issue-23620-invalid-escapes.rs:25:15
   |
 LL |     let _ = b"\u{a4a4} \xf \u";
   |               ^^^^^^^^

 error: invalid character in numeric character escape:  
-  --> $DIR/issue-23620-invalid-escapes.rs:28:27
+  --> $DIR/issue-23620-invalid-escapes.rs:25:27
   |
 LL |     let _ = b"\u{a4a4} \xf \u";
   |                           ^

 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:28:28
+  --> $DIR/issue-23620-invalid-escapes.rs:25:28
   |
 LL |     let _ = b"\u{a4a4} \xf \u";
   |                            ^^ incorrect unicode escape sequence
   |
   = help: format of unicode escape sequences is `\u{...}`

-error: unicode escape sequences cannot be used as a byte or in a byte string
-  --> $DIR/issue-23620-invalid-escapes.rs:28:28
-   |
-LL |     let _ = b"\u{a4a4} \xf \u";
-   |                            ^^
-
 error: invalid character in numeric character escape:  
-  --> $DIR/issue-23620-invalid-escapes.rs:34:17
+  --> $DIR/issue-23620-invalid-escapes.rs:30:17
   |
 LL |     let _ = "\xf \u";
   |                 ^

-error: this form of character escape may only be used with characters in the range [\x00-\x7f]
-  --> $DIR/issue-23620-invalid-escapes.rs:34:16
-   |
-LL |     let _ = "\xf \u";
-   |                ^^
-
 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:34:18
+  --> $DIR/issue-23620-invalid-escapes.rs:30:18
   |
 LL |     let _ = "\xf \u";
   |                  ^^ incorrect unicode escape sequence
@ -107,12 +77,12 @@ LL |     let _ = "\xf \u";
   = help: format of unicode escape sequences is `\u{...}`

 error: incorrect unicode escape sequence
-  --> $DIR/issue-23620-invalid-escapes.rs:39:14
+  --> $DIR/issue-23620-invalid-escapes.rs:34:14
   |
 LL |     let _ = "\u8f";
   |              ^^--
-   |              |
-   |              help: format of unicode escape sequences uses braces: `\u{8f}`
+   |                |
+   |                help: format of unicode escape sequences uses braces: `\u{8f}`

-error: aborting due to 18 previous errors
+error: aborting due to 13 previous errors

--- a/src/test/ui/parser/lex-bad-char-literals-1.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-1.stderr
@ -1,14 +1,14 @@
 error: numeric character escape is too short
-  --> $DIR/lex-bad-char-literals-1.rs:3:8
+  --> $DIR/lex-bad-char-literals-1.rs:3:6
   |
 LL |     '\x1'
-   |        ^
+   |      ^^^

 error: numeric character escape is too short
-  --> $DIR/lex-bad-char-literals-1.rs:7:8
+  --> $DIR/lex-bad-char-literals-1.rs:7:6
   |
 LL |     "\x1"
-   |        ^
+   |      ^^^

 error: unknown character escape: \u{25cf}
  --> $DIR/lex-bad-char-literals-1.rs:11:7
--- a/src/test/ui/parser/lex-bad-char-literals-2.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-2.stderr
@ -3,6 +3,10 @@ error: character literal may only contain one codepoint
   |
 LL |     'nope'
   |     ^^^^^^
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     "nope"
+   |     ^^^^^^

 error[E0601]: `main` function not found in crate `lex_bad_char_literals_2`
   |
--- a/src/test/ui/parser/lex-bad-char-literals-4.rs
+++ b/src/test/ui/parser/lex-bad-char-literals-4.rs
@ -1,5 +1,5 @@
 //
 // This test needs to the last one appearing in this file as it kills the parser
 static c: char =
-    '●  //~ ERROR: character literal may only contain one codepoint
+    '●  //~ ERROR: unterminated character literal
 ;
--- a/src/test/ui/parser/lex-bad-char-literals-4.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-4.stderr
@ -1,8 +1,8 @@
-error: character literal may only contain one codepoint: '●
+error: unterminated character literal
  --> $DIR/lex-bad-char-literals-4.rs:4:5
   |
 LL |     '●
-   |     ^^
+   |     ^^^^

 error: aborting due to previous error

--- a/src/test/ui/parser/lex-bad-char-literals-6.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-6.stderr
@ -3,18 +3,30 @@ error: character literal may only contain one codepoint
   |
 LL |     let x: &str = 'ab';
   |                   ^^^^
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let x: &str = "ab";
+   |                   ^^^^

 error: character literal may only contain one codepoint
  --> $DIR/lex-bad-char-literals-6.rs:4:19
   |
 LL |     let y: char = 'cd';
   |                   ^^^^
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let y: char = "cd";
+   |                   ^^^^

 error: character literal may only contain one codepoint
  --> $DIR/lex-bad-char-literals-6.rs:6:13
   |
 LL |     let z = 'ef';
   |             ^^^^
+help: if you meant to write a `str` literal, use double quotes
+   |
+LL |     let z = "ef";
+   |             ^^^^

 error[E0277]: can't compare `&str` with `char`
  --> $DIR/lex-bad-char-literals-6.rs:9:10
--- a/src/test/ui/parser/lex-bad-char-literals-7.rs
+++ b/src/test/ui/parser/lex-bad-char-literals-7.rs
@ -0,0 +1,14 @@
+// compile-flags: -Z continue-parse-after-error
+fn main() {
+    let _: char = '';
+    //~^ ERROR: empty character literal
+    let _: char = '\u{}';
+    //~^ ERROR: empty unicode escape (must have at least 1 hex digit)
+
+    // Next two are OK, but may befool error recovery
+    let _ = '/';
+    let _ = b'/';
+
+    let _ = ' hello // here's a comment
+    //~^ ERROR: unterminated character literal
+}
--- a/src/test/ui/parser/lex-bad-char-literals-7.stderr
+++ b/src/test/ui/parser/lex-bad-char-literals-7.stderr
@ -0,0 +1,20 @@
+error: empty character literal
+  --> $DIR/lex-bad-char-literals-7.rs:3:20
+   |
+LL |     let _: char = '';
+   |                    ^
+
+error: empty unicode escape (must have at least 1 hex digit)
+  --> $DIR/lex-bad-char-literals-7.rs:5:20
+   |
+LL |     let _: char = '\u{}';
+   |                    ^^^^
+
+error: unterminated character literal
+  --> $DIR/lex-bad-char-literals-7.rs:12:13
+   |
+LL |     let _ = ' hello // here's a comment
+   |             ^^^^^^^^
+
+error: aborting due to 3 previous errors
+
--- a/src/test/ui/parser/macro/literals-are-validated-before-expansion.rs
+++ b/src/test/ui/parser/macro/literals-are-validated-before-expansion.rs
@ -0,0 +1,10 @@
+macro_rules! black_hole {
+    ($($tt:tt)*) => {}
+}
+
+fn main() {
+    black_hole! { '\u{FFFFFF}' }
+    //~^ ERROR: invalid unicode character escape
+    black_hole! { "this is surrogate: \u{DAAA}" }
+    //~^ ERROR: invalid unicode character escape
+}
--- a/src/test/ui/parser/macro/literals-are-validated-before-expansion.stderr
+++ b/src/test/ui/parser/macro/literals-are-validated-before-expansion.stderr
@ -0,0 +1,18 @@
+error: invalid unicode character escape
+  --> $DIR/literals-are-validated-before-expansion.rs:6:20
+   |
+LL |     black_hole! { '\u{FFFFFF}' }
+   |                    ^^^^^^^^^^
+   |
+   = help: unicode escape must be at most 10FFFF
+
+error: invalid unicode character escape
+  --> $DIR/literals-are-validated-before-expansion.rs:8:39
+   |
+LL |     black_hole! { "this is surrogate: \u{DAAA}" }
+   |                                       ^^^^^^^^
+   |
+   = help: unicode escape must not be a surrogate
+
+error: aborting due to 2 previous errors
+
--- a/src/test/ui/parser/new-unicode-escapes-1.stderr
+++ b/src/test/ui/parser/new-unicode-escapes-1.stderr
@ -1,8 +1,8 @@
 error: unterminated unicode escape (needed a `}`)
-  --> $DIR/new-unicode-escapes-1.rs:2:21
+  --> $DIR/new-unicode-escapes-1.rs:2:14
   |
 LL |     let s = "\u{2603";
-   |                     ^
+   |              ^^^^^^^

 error: aborting due to previous error

--- a/src/test/ui/parser/new-unicode-escapes-2.stderr
+++ b/src/test/ui/parser/new-unicode-escapes-2.stderr
@ -1,8 +1,8 @@
 error: overlong unicode escape (must have at most 6 hex digits)
-  --> $DIR/new-unicode-escapes-2.rs:2:17
+  --> $DIR/new-unicode-escapes-2.rs:2:14
   |
 LL |     let s = "\u{260311111111}";
-   |                 ^^^^^^^^^^^^
+   |              ^^^^^^^^^^^^^^^^

 error: aborting due to previous error

--- a/src/test/ui/parser/new-unicode-escapes-3.stderr
+++ b/src/test/ui/parser/new-unicode-escapes-3.stderr
@ -1,16 +1,16 @@
 error: invalid unicode character escape
-  --> $DIR/new-unicode-escapes-3.rs:2:14
+  --> $DIR/new-unicode-escapes-3.rs:2:15
   |
 LL |     let s1 = "\u{d805}";
-   |              ^^^^^^^^^^
+   |               ^^^^^^^^
   |
   = help: unicode escape must not be a surrogate

 error: invalid unicode character escape
-  --> $DIR/new-unicode-escapes-3.rs:3:14
+  --> $DIR/new-unicode-escapes-3.rs:3:15
   |
 LL |     let s2 = "\u{ffffff}";
-   |              ^^^^^^^^^^^^
+   |               ^^^^^^^^^^
   |
   = help: unicode escape must be at most 10FFFF