diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index e347a76f6a2a..c29ab569f479 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -350,6 +350,21 @@ pub fn is_whitespace(c: char) -> bool { ) } +/// True if `c` is considered horizontal whitespace according to Rust language definition. +pub fn is_horizontal_whitespace(c: char) -> bool { + // This is Pattern_White_Space. + // + // Note that this set is stable (ie, it doesn't change with different + // Unicode versions), so it's ok to just hard-code the values. + + matches!( + c, + // Horizontal space characters + '\u{0009}' // tab (\t) + | '\u{0020}' // space + ) +} + /// True if `c` is valid as a first character of an identifier. /// See [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html) for /// a formal definition of valid identifier name. @@ -536,7 +551,7 @@ impl Cursor<'_> { debug_assert!(length_opening >= 3); // whitespace between the opening and the infostring. - self.eat_while(|ch| ch != '\n' && is_whitespace(ch)); + self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch)); // copied from `eat_identifier`, but allows `-` and `.` in infostring to allow something like // `---Cargo.toml` as a valid opener @@ -545,7 +560,7 @@ impl Cursor<'_> { self.eat_while(|c| is_id_continue(c) || c == '-' || c == '.'); } - self.eat_while(|ch| ch != '\n' && is_whitespace(ch)); + self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch)); let invalid_infostring = self.first() != '\n'; let mut found = false; @@ -586,7 +601,7 @@ impl Cursor<'_> { // on a standalone line. Might be wrong. while let Some(closing) = rest.find("---") { let preceding_chars_start = rest[..closing].rfind("\n").map_or(0, |i| i + 1); - if rest[preceding_chars_start..closing].chars().all(is_whitespace) { + if rest[preceding_chars_start..closing].chars().all(is_horizontal_whitespace) { // candidate found potential_closing = Some(closing); break; diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 9792240a5485..e3bd6a9a3270 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -6,7 +6,7 @@ use rustc_ast::util::unicode::{TEXT_FLOW_CONTROL_CHARS, contains_text_flow_contr use rustc_errors::codes::*; use rustc_errors::{Applicability, Diag, DiagCtxtHandle, StashKey}; use rustc_lexer::{ - Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_whitespace, + Base, Cursor, DocStyle, FrontmatterAllowed, LiteralKind, RawStrError, is_horizontal_whitespace, }; use rustc_literal_escaper::{EscapeError, Mode, check_for_errors}; use rustc_session::lint::BuiltinLintDiag; @@ -597,7 +597,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { let last_line_start = within.rfind('\n').map_or(0, |i| i + 1); let last_line = &within[last_line_start..]; - let last_line_trimmed = last_line.trim_start_matches(is_whitespace); + let last_line_trimmed = last_line.trim_start_matches(is_horizontal_whitespace); let last_line_start_pos = frontmatter_opening_end_pos + BytePos(last_line_start as u32); let frontmatter_span = self.mk_sp(frontmatter_opening_pos, self.pos); @@ -640,7 +640,7 @@ impl<'psess, 'src> Lexer<'psess, 'src> { }); } - if !rest.trim_matches(is_whitespace).is_empty() { + if !rest.trim_matches(is_horizontal_whitespace).is_empty() { let span = self.mk_sp(last_line_start_pos, self.pos); self.dcx().emit_err(errors::FrontmatterExtraCharactersAfterClose { span }); }