Make non-ASCII errors more consistent.

There are three kinds of "byte" literals: byte literals, byte string
literals, and raw byte string literals. None are allowed to have
non-ASCII chars in them.

Two `EscapeError` variants exist for when that constraint is violated.
- `NonAsciiCharInByte`: used for byte literals and byte string literals.
- `NonAsciiCharInByteString`: used for raw byte string literals.

As a result, the messages for raw byte string literals use different
wording, without good reason. Also, byte string literals are incorrectly
described as "byte constants" in some error messages.

This commit eliminates `NonAsciiCharInByteString` so the three cases are
handled similarly, and described correctly. The `mode` is enough to
distinguish them.

Note: Some existing error messages mention "byte constants" and some
mention "byte literals". I went with the latter here, because it's a
more correct name, as used by the Reference.
This commit is contained in:
Nicholas Nethercote 2022-11-03 15:17:37 +11:00
parent 34b32b0dac
commit 7dbf2c0ed8
15 changed files with 62 additions and 74 deletions

View file

@ -7,6 +7,6 @@ pub fn main() {
b'\x0Z'; //~ ERROR invalid character in numeric character escape: `Z`
b' '; //~ ERROR byte constant must be escaped
b'''; //~ ERROR byte constant must be escaped
b'é'; //~ ERROR non-ASCII character in byte constant
b'é'; //~ ERROR non-ASCII character in byte literal
b'a //~ ERROR unterminated byte constant [E0763]
}

View file

@ -32,11 +32,11 @@ error: byte constant must be escaped: `'`
LL | b''';
| ^ help: escape the character: `\'`
error: non-ASCII character in byte constant
error: non-ASCII character in byte literal
--> $DIR/byte-literals.rs:10:7
|
LL | b'é';
| ^ byte constant must be ASCII
| ^ must be ASCII
|
help: if you meant to use the unicode code point for 'é', use a \xHH escape
|

View file

@ -3,7 +3,7 @@ static FOO: &'static [u8] = b"\f"; //~ ERROR unknown byte escape
pub fn main() {
b"\f"; //~ ERROR unknown byte escape
b"\x0Z"; //~ ERROR invalid character in numeric character escape: `Z`
b"é"; //~ ERROR non-ASCII character in byte constant
br##"é"##; //~ ERROR raw byte string must be ASCII
b"é"; //~ ERROR non-ASCII character in byte string literal
br##"é"##; //~ ERROR non-ASCII character in raw byte string literal
b"a //~ ERROR unterminated double quote byte string
}

View file

@ -20,18 +20,18 @@ error: invalid character in numeric character escape: `Z`
LL | b"\x0Z";
| ^ invalid character in numeric character escape
error: non-ASCII character in byte constant
error: non-ASCII character in byte string literal
--> $DIR/byte-string-literals.rs:6:7
|
LL | b"é";
| ^ byte constant must be ASCII
| ^ must be ASCII
|
help: if you meant to use the unicode code point for 'é', use a \xHH escape
|
LL | b"\xE9";
| ~~~~
error: raw byte string must be ASCII
error: non-ASCII character in raw byte string literal
--> $DIR/byte-string-literals.rs:7:10
|
LL | br##"é"##;

View file

@ -2,6 +2,6 @@
pub fn main() {
br"a "; //~ ERROR bare CR not allowed in raw string
br"é"; //~ ERROR raw byte string must be ASCII
br"é"; //~ ERROR non-ASCII character in raw byte string literal
br##~"a"~##; //~ ERROR only `#` is allowed in raw string delimitation
}

View file

@ -4,7 +4,7 @@ error: bare CR not allowed in raw string
LL | br"a ";
| ^
error: raw byte string must be ASCII
error: non-ASCII character in raw byte string literal
--> $DIR/raw-byte-string-literals.rs:5:8
|
LL | br"é";

View file

@ -14,15 +14,15 @@ fn main() {
println!("{:?}", r##"/* } if isAdmin begin admins only "##);
//~^ ERROR unicode codepoint changing visible direction of text present in literal
println!("{:?}", b"/* } if isAdmin begin admins only ");
//~^ ERROR non-ASCII character in byte constant
//~| ERROR non-ASCII character in byte constant
//~| ERROR non-ASCII character in byte constant
//~| ERROR non-ASCII character in byte constant
//~^ ERROR non-ASCII character in byte string literal
//~| ERROR non-ASCII character in byte string literal
//~| ERROR non-ASCII character in byte string literal
//~| ERROR non-ASCII character in byte string literal
println!("{:?}", br##"/* } if isAdmin begin admins only "##);
//~^ ERROR raw byte string must be ASCII
//~| ERROR raw byte string must be ASCII
//~| ERROR raw byte string must be ASCII
//~| ERROR raw byte string must be ASCII
//~^ ERROR non-ASCII character in raw byte string literal
//~| ERROR non-ASCII character in raw byte string literal
//~| ERROR non-ASCII character in raw byte string literal
//~| ERROR non-ASCII character in raw byte string literal
println!("{:?}", '');
//~^ ERROR unicode codepoint changing visible direction of text present in literal
}

View file

@ -14,69 +14,69 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r");
|
= help: unicode escape sequences cannot be used as a byte or in a byte string
error: non-ASCII character in byte constant
error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:26
|
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
| ^ byte constant must be ASCII but is '\u{202e}'
| ^ must be ASCII but is '\u{202e}'
|
help: if you meant to use the UTF-8 encoding of '\u{202e}', use \xHH escapes
|
LL | println!("{:?}", b"/*\xE2\x80\xAE } if isAdmin begin admins only ");
| ~~~~~~~~~~~~
error: non-ASCII character in byte constant
error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:30
|
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
| ^ byte constant must be ASCII but is '\u{2066}'
| ^ must be ASCII but is '\u{2066}'
|
help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes
|
LL | println!("{:?}", b"/* } \xE2\x81\xA6if isAdmin begin admins only ");
| ~~~~~~~~~~~~
error: non-ASCII character in byte constant
error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:41
|
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
| ^ byte constant must be ASCII but is '\u{2069}'
| ^ must be ASCII but is '\u{2069}'
|
help: if you meant to use the UTF-8 encoding of '\u{2069}', use \xHH escapes
|
LL | println!("{:?}", b"/* } if isAdmin\xE2\x81\xA9 begin admins only ");
| ~~~~~~~~~~~~
error: non-ASCII character in byte constant
error: non-ASCII character in byte string literal
--> $DIR/unicode-control-codepoints.rs:16:43
|
LL | println!("{:?}", b"/* } if isAdmin begin admins only ");
| ^ byte constant must be ASCII but is '\u{2066}'
| ^ must be ASCII but is '\u{2066}'
|
help: if you meant to use the UTF-8 encoding of '\u{2066}', use \xHH escapes
|
LL | println!("{:?}", b"/* } if isAdmin \xE2\x81\xA6 begin admins only ");
| ~~~~~~~~~~~~
error: raw byte string must be ASCII
error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:29
|
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
| ^ must be ASCII but is '\u{202e}'
error: raw byte string must be ASCII
error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:33
|
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
| ^ must be ASCII but is '\u{2066}'
error: raw byte string must be ASCII
error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:44
|
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);
| ^ must be ASCII but is '\u{2069}'
error: raw byte string must be ASCII
error: non-ASCII character in raw byte string literal
--> $DIR/unicode-control-codepoints.rs:21:46
|
LL | println!("{:?}", br##"/* } if isAdmin begin admins only "##);