Rollup merge of #150790 - lexer/help-invisible-character, r=Kivooeo,tgross35

feat: invisible character help string

I was playing around with zero width spaces in different programming languages and thought that this error message could be more helpful. Hopefully it's a good first contribution! :)
This commit is contained in:
Matthias Krüger 2026-01-12 00:02:53 +01:00 committed by GitHub
commit 66d18446fc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 28 additions and 0 deletions

View file

@ -967,6 +967,7 @@ parse_unknown_start_of_token = unknown start of token: {$escaped}
.sugg_quotes = Unicode characters '“' (Left Double Quotation Mark) and '”' (Right Double Quotation Mark) look like '{$ascii_str}' ({$ascii_name}), but are not
.sugg_other = Unicode character '{$ch}' ({$u_name}) looks like '{$ascii_str}' ({$ascii_name}), but it is not
.help_null = source files must contain UTF-8 encoded text, unexpected null bytes might occur when a different encoding is used
.help_invisible_char = invisible characters like '{$escaped}' are not usually visible in text editors
.note_repeats = character appears {$repeats ->
[one] once more
*[other] {$repeats} more times

View file

@ -2369,6 +2369,8 @@ pub(crate) struct UnknownTokenStart {
pub null: Option<UnknownTokenNull>,
#[subdiagnostic]
pub repeat: Option<UnknownTokenRepeat>,
#[subdiagnostic]
pub invisible: Option<InvisibleCharacter>,
}
#[derive(Subdiagnostic)]
@ -2409,6 +2411,10 @@ pub(crate) struct UnknownTokenRepeat {
pub repeats: usize,
}
#[derive(Subdiagnostic)]
#[help(parse_help_invisible_char)]
pub(crate) struct InvisibleCharacter;
#[derive(Subdiagnostic)]
#[help(parse_help_null)]
pub(crate) struct UnknownTokenNull;

View file

@ -36,6 +36,10 @@ use unescape_error_reporting::{emit_unescape_error, escaped_char};
#[cfg(target_pointer_width = "64")]
rustc_data_structures::static_assert_size!(rustc_lexer::Token, 12);
const INVISIBLE_CHARACTERS: [char; 8] = [
'\u{200b}', '\u{200c}', '\u{2060}', '\u{2061}', '\u{2062}', '\u{00ad}', '\u{034f}', '\u{061c}',
];
#[derive(Clone, Debug)]
pub(crate) struct UnmatchedDelim {
pub found_delim: Option<Delimiter>,
@ -456,6 +460,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> {
escaped: escaped_char(c),
sugg,
null: if c == '\x00' { Some(errors::UnknownTokenNull) } else { None },
invisible: if INVISIBLE_CHARACTERS.contains(&c) { Some(errors::InvisibleCharacter) } else { None },
repeat: if repeats > 0 {
swallow_next_invalid = repeats;
Some(errors::UnknownTokenRepeat { repeats })

View file

@ -0,0 +1,6 @@
// Provide extra help when a user has an invisible character in their code
fn main() {
//~^ ERROR unknown start of token: \u{200b}
//~| HELP invisible characters like '\u{200b}' are not usually visible in text editors
}

View file

@ -0,0 +1,10 @@
error: unknown start of token: \u{200b}
--> $DIR/lex-invisible-characters.rs:3:8
|
LL | fn main() {
| ^
|
= help: invisible characters like '\u{200b}' are not usually visible in text editors
error: aborting due to 1 previous error