Rollup merge of #143708 - epage:pretty, r=compiler-errors

fix: Include frontmatter in -Zunpretty output

In the implementation (rust-lang/rust#140035), this was left as an open question for
the tracking issue (rust-lang/rust#136889).  My assumption is that this should be
carried over.

The test was carried over from rust-lang/rust#137193 which was superseded by rust-lang/rust#140035.

Thankfully, either way, `-Zunpretty` is unstable and we can always
change it even if we stabilize frontmatter.
This commit is contained in:
Matthias Krüger 2025-07-11 07:35:21 +02:00 committed by GitHub
commit dbd2f30395
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 157 additions and 39 deletions

View file

@ -120,7 +120,7 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment>
pos += shebang_len;
}
for token in rustc_lexer::tokenize(&text[pos..]) {
for token in rustc_lexer::tokenize(&text[pos..], rustc_lexer::FrontmatterAllowed::Yes) {
let token_text = &text[pos..pos + token.len as usize];
match token.kind {
rustc_lexer::TokenKind::Whitespace => {
@ -171,6 +171,14 @@ fn gather_comments(sm: &SourceMap, path: FileName, src: String) -> Vec<Comment>
})
}
}
rustc_lexer::TokenKind::Frontmatter { .. } => {
code_to_the_left = false;
comments.push(Comment {
style: CommentStyle::Isolated,
lines: vec![token_text.to_string()],
pos: start_bpos + BytePos(pos as u32),
});
}
_ => {
code_to_the_left = true;
}

View file

@ -273,14 +273,15 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
if let Some(input_tail) = input.strip_prefix("#!") {
// Ok, this is a shebang but if the next non-whitespace token is `[`,
// then it may be valid Rust code, so consider it Rust code.
let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok| {
!matches!(
tok,
TokenKind::Whitespace
| TokenKind::LineComment { doc_style: None }
| TokenKind::BlockComment { doc_style: None, .. }
)
});
let next_non_whitespace_token =
tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| {
!matches!(
tok,
TokenKind::Whitespace
| TokenKind::LineComment { doc_style: None }
| TokenKind::BlockComment { doc_style: None, .. }
)
});
if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
// No other choice than to consider this a shebang.
return Some(2 + input_tail.lines().next().unwrap_or_default().len());
@ -303,8 +304,16 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
}
/// Creates an iterator that produces tokens from the input string.
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
///
/// When parsing a full Rust document,
/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`].
///
/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`]
pub fn tokenize(
input: &str,
frontmatter_allowed: FrontmatterAllowed,
) -> impl Iterator<Item = Token> {
let mut cursor = Cursor::new(input, frontmatter_allowed);
std::iter::from_fn(move || {
let token = cursor.advance_token();
if token.kind != TokenKind::Eof { Some(token) } else { None }

View file

@ -124,8 +124,9 @@ fn test_valid_shebang() {
assert_eq!(strip_shebang(input), None);
}
fn check_lexing(src: &str, expect: Expect) {
let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect();
fn check_lexing(src: &str, frontmatter_allowed: FrontmatterAllowed, expect: Expect) {
let actual: String =
tokenize(src, frontmatter_allowed).map(|token| format!("{:?}\n", token)).collect();
expect.assert_eq(&actual)
}
@ -133,6 +134,7 @@ fn check_lexing(src: &str, expect: Expect) {
fn smoke_test() {
check_lexing(
"/* my source file */ fn main() { println!(\"zebra\"); }\n",
FrontmatterAllowed::No,
expect![[r#"
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 }
Token { kind: Whitespace, len: 1 }
@ -171,6 +173,7 @@ fn comment_flavors() {
/** outer doc block */
/*! inner doc block */
",
FrontmatterAllowed::No,
expect![[r#"
Token { kind: Whitespace, len: 1 }
Token { kind: LineComment { doc_style: None }, len: 7 }
@ -199,6 +202,7 @@ fn comment_flavors() {
fn nested_block_comments() {
check_lexing(
"/* /* */ */'a'",
FrontmatterAllowed::No,
expect![[r#"
Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 }
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
@ -210,6 +214,7 @@ fn nested_block_comments() {
fn characters() {
check_lexing(
"'a' ' ' '\\n'",
FrontmatterAllowed::No,
expect![[r#"
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
Token { kind: Whitespace, len: 1 }
@ -224,6 +229,7 @@ fn characters() {
fn lifetime() {
check_lexing(
"'abc",
FrontmatterAllowed::No,
expect![[r#"
Token { kind: Lifetime { starts_with_number: false }, len: 4 }
"#]],
@ -234,6 +240,7 @@ fn lifetime() {
fn raw_string() {
check_lexing(
"r###\"\"#a\\b\x00c\"\"###",
FrontmatterAllowed::No,
expect![[r#"
Token { kind: Literal { kind: RawStr { n_hashes: Some(3) }, suffix_start: 17 }, len: 17 }
"#]],
@ -257,6 +264,7 @@ b"a"
r###"raw"###suffix
br###"raw"###suffix
"####,
FrontmatterAllowed::No,
expect![[r#"
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 }
@ -286,3 +294,78 @@ br###"raw"###suffix
"#]],
)
}
#[test]
fn frontmatter_allowed() {
check_lexing(
r#"
---cargo
[dependencies]
clap = "4"
---
fn main() {}
"#,
FrontmatterAllowed::Yes,
expect![[r#"
Token { kind: Whitespace, len: 1 }
Token { kind: Frontmatter { has_invalid_preceding_whitespace: false, invalid_infostring: false }, len: 38 }
Token { kind: Whitespace, len: 2 }
Token { kind: Ident, len: 2 }
Token { kind: Whitespace, len: 1 }
Token { kind: Ident, len: 4 }
Token { kind: OpenParen, len: 1 }
Token { kind: CloseParen, len: 1 }
Token { kind: Whitespace, len: 1 }
Token { kind: OpenBrace, len: 1 }
Token { kind: CloseBrace, len: 1 }
Token { kind: Whitespace, len: 1 }
"#]],
)
}
#[test]
fn frontmatter_disallowed() {
check_lexing(
r#"
---cargo
[dependencies]
clap = "4"
---
fn main() {}
"#,
FrontmatterAllowed::No,
expect![[r#"
Token { kind: Whitespace, len: 1 }
Token { kind: Minus, len: 1 }
Token { kind: Minus, len: 1 }
Token { kind: Minus, len: 1 }
Token { kind: Ident, len: 5 }
Token { kind: Whitespace, len: 1 }
Token { kind: OpenBracket, len: 1 }
Token { kind: Ident, len: 12 }
Token { kind: CloseBracket, len: 1 }
Token { kind: Whitespace, len: 1 }
Token { kind: Ident, len: 4 }
Token { kind: Whitespace, len: 1 }
Token { kind: Eq, len: 1 }
Token { kind: Whitespace, len: 1 }
Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 }
Token { kind: Whitespace, len: 1 }
Token { kind: Minus, len: 1 }
Token { kind: Minus, len: 1 }
Token { kind: Minus, len: 1 }
Token { kind: Whitespace, len: 2 }
Token { kind: Ident, len: 2 }
Token { kind: Whitespace, len: 1 }
Token { kind: Ident, len: 4 }
Token { kind: OpenParen, len: 1 }
Token { kind: CloseParen, len: 1 }
Token { kind: Whitespace, len: 1 }
Token { kind: OpenBrace, len: 1 }
Token { kind: CloseBrace, len: 1 }
Token { kind: Whitespace, len: 1 }
"#]],
)
}

View file

@ -9,7 +9,7 @@ use clippy_utils::visitors::{Descend, for_each_expr};
use hir::HirId;
use rustc_hir as hir;
use rustc_hir::{Block, BlockCheckMode, ItemKind, Node, UnsafeSource};
use rustc_lexer::{TokenKind, tokenize};
use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize};
use rustc_lint::{LateContext, LateLintPass, LintContext};
use rustc_session::impl_lint_pass;
use rustc_span::{BytePos, Pos, RelativeBytePos, Span, SyntaxContext};
@ -746,7 +746,7 @@ fn text_has_safety_comment(src: &str, line_starts: &[RelativeBytePos], start_pos
loop {
if line.starts_with("/*") {
let src = &src[line_start..line_starts.last().unwrap().to_usize()];
let mut tokens = tokenize(src);
let mut tokens = tokenize(src, FrontmatterAllowed::No);
return (src[..tokens.next().unwrap().len as usize]
.to_ascii_uppercase()
.contains("SAFETY:")

View file

@ -3,7 +3,7 @@ use clippy_utils::source::SpanRangeExt;
use itertools::Itertools;
use rustc_ast::{Crate, Expr, ExprKind, FormatArgs};
use rustc_data_structures::fx::FxHashMap;
use rustc_lexer::{TokenKind, tokenize};
use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize};
use rustc_lint::{EarlyContext, EarlyLintPass};
use rustc_session::impl_lint_pass;
use rustc_span::{Span, hygiene};
@ -82,7 +82,7 @@ fn has_span_from_proc_macro(cx: &EarlyContext<'_>, args: &FormatArgs) -> bool {
.all(|sp| {
sp.check_source_text(cx, |src| {
// text should be either `, name` or `, name =`
let mut iter = tokenize(src).filter(|t| {
let mut iter = tokenize(src, FrontmatterAllowed::No).filter(|t| {
!matches!(
t.kind,
TokenKind::LineComment { .. } | TokenKind::BlockComment { .. } | TokenKind::Whitespace

View file

@ -15,7 +15,7 @@ use rustc_hir::def::{DefKind, Res};
use rustc_hir::{
BinOpKind, Block, ConstBlock, Expr, ExprKind, HirId, Item, ItemKind, Node, PatExpr, PatExprKind, QPath, UnOp,
};
use rustc_lexer::tokenize;
use rustc_lexer::{FrontmatterAllowed, tokenize};
use rustc_lint::LateContext;
use rustc_middle::mir::ConstValue;
use rustc_middle::mir::interpret::{Scalar, alloc_range};
@ -304,9 +304,7 @@ pub fn lit_to_mir_constant<'tcx>(lit: &LitKind, ty: Option<Ty<'tcx>>) -> Constan
match *lit {
LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
LitKind::Byte(b) => Constant::Int(u128::from(b)),
LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => {
Constant::Binary(s.as_byte_str().to_vec())
}
LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => Constant::Binary(s.as_byte_str().to_vec()),
LitKind::Char(c) => Constant::Char(c),
LitKind::Int(n, _) => Constant::Int(n.get()),
LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {
@ -568,9 +566,7 @@ impl<'tcx> ConstEvalCtxt<'tcx> {
} else {
match &lit.node {
LitKind::Str(is, _) => Some(is.is_empty()),
LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => {
Some(s.as_byte_str().is_empty())
}
LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => Some(s.as_byte_str().is_empty()),
_ => None,
}
}
@ -715,7 +711,7 @@ impl<'tcx> ConstEvalCtxt<'tcx> {
&& let Some(src) = src.as_str()
{
use rustc_lexer::TokenKind::{BlockComment, LineComment, OpenBrace, Semi, Whitespace};
if !tokenize(src)
if !tokenize(src, FrontmatterAllowed::No)
.map(|t| t.kind)
.filter(|t| !matches!(t, Whitespace | LineComment { .. } | BlockComment { .. } | Semi))
.eq([OpenBrace])

View file

@ -12,7 +12,7 @@ use rustc_hir::{
Pat, PatExpr, PatExprKind, PatField, PatKind, Path, PathSegment, PrimTy, QPath, Stmt, StmtKind, StructTailExpr,
TraitBoundModifiers, Ty, TyKind, TyPat, TyPatKind,
};
use rustc_lexer::{TokenKind, tokenize};
use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize};
use rustc_lint::LateContext;
use rustc_middle::ty::TypeckResults;
use rustc_span::{BytePos, ExpnKind, MacroKind, Symbol, SyntaxContext, sym};
@ -686,7 +686,7 @@ fn reduce_exprkind<'hir>(cx: &LateContext<'_>, kind: &'hir ExprKind<'hir>) -> &'
// `{}` => `()`
([], None)
if block.span.check_source_text(cx, |src| {
tokenize(src)
tokenize(src, FrontmatterAllowed::No)
.map(|t| t.kind)
.filter(|t| {
!matches!(

View file

@ -106,7 +106,7 @@ use rustc_hir::{
Param, Pat, PatExpr, PatExprKind, PatKind, Path, PathSegment, QPath, Stmt, StmtKind, TraitFn, TraitItem,
TraitItemKind, TraitRef, TyKind, UnOp, def,
};
use rustc_lexer::{TokenKind, tokenize};
use rustc_lexer::{FrontmatterAllowed, TokenKind, tokenize};
use rustc_lint::{LateContext, Level, Lint, LintContext};
use rustc_middle::hir::nested_filter;
use rustc_middle::hir::place::PlaceBase;
@ -2764,7 +2764,7 @@ pub fn expr_use_ctxt<'tcx>(cx: &LateContext<'tcx>, e: &Expr<'tcx>) -> ExprUseCtx
/// Tokenizes the input while keeping the text associated with each token.
pub fn tokenize_with_text(s: &str) -> impl Iterator<Item = (TokenKind, &str, InnerSpan)> {
let mut pos = 0;
tokenize(s).map(move |t| {
tokenize(s, FrontmatterAllowed::No).map(move |t| {
let end = pos + t.len;
let range = pos as usize..end as usize;
let inner = InnerSpan::new(range.start, range.end);
@ -2779,7 +2779,7 @@ pub fn span_contains_comment(sm: &SourceMap, span: Span) -> bool {
let Ok(snippet) = sm.span_to_snippet(span) else {
return false;
};
return tokenize(&snippet).any(|token| {
return tokenize(&snippet, FrontmatterAllowed::No).any(|token| {
matches!(
token.kind,
TokenKind::BlockComment { .. } | TokenKind::LineComment { .. }

View file

@ -7,7 +7,7 @@ use std::sync::Arc;
use rustc_ast::{LitKind, StrStyle};
use rustc_errors::Applicability;
use rustc_hir::{BlockCheckMode, Expr, ExprKind, UnsafeSource};
use rustc_lexer::{LiteralKind, TokenKind, tokenize};
use rustc_lexer::{FrontmatterAllowed, LiteralKind, TokenKind, tokenize};
use rustc_lint::{EarlyContext, LateContext};
use rustc_middle::ty::TyCtxt;
use rustc_session::Session;
@ -277,7 +277,7 @@ fn map_range(
}
fn ends_with_line_comment_or_broken(text: &str) -> bool {
let Some(last) = tokenize(text).last() else {
let Some(last) = tokenize(text, FrontmatterAllowed::No).last() else {
return false;
};
match last.kind {
@ -310,7 +310,8 @@ fn with_leading_whitespace_inner(lines: &[RelativeBytePos], src: &str, range: Ra
&& ends_with_line_comment_or_broken(&start[prev_start..])
&& let next_line = lines.partition_point(|&pos| pos.to_usize() < range.end)
&& let next_start = lines.get(next_line).map_or(src.len(), |&x| x.to_usize())
&& tokenize(src.get(range.end..next_start)?).any(|t| !matches!(t.kind, TokenKind::Whitespace))
&& tokenize(src.get(range.end..next_start)?, FrontmatterAllowed::No)
.any(|t| !matches!(t.kind, TokenKind::Whitespace))
{
Some(range.start)
} else {

View file

@ -11,8 +11,8 @@
use std::ops;
use rustc_literal_escaper::{
EscapeError, Mode, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char,
unescape_str,
unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str, EscapeError,
Mode,
};
use crate::{
@ -44,7 +44,9 @@ impl<'a> LexedStr<'a> {
// Re-create the tokenizer from scratch every token because `GuardedStrPrefix` is one token in the lexer
// but we want to split it to two in edition <2024.
while let Some(token) = rustc_lexer::tokenize(&text[conv.offset..]).next() {
while let Some(token) =
rustc_lexer::tokenize(&text[conv.offset..], rustc_lexer::FrontmatterAllowed::No).next()
{
let token_text = &text[conv.offset..][..token.len as usize];
conv.extend_token(&token.kind, token_text);
@ -58,7 +60,7 @@ impl<'a> LexedStr<'a> {
return None;
}
let token = rustc_lexer::tokenize(text).next()?;
let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next()?;
if token.len as usize != text.len() {
return None;
}

View file

@ -121,7 +121,7 @@ pub(super) fn literal_from_str<Span: Copy>(
use proc_macro::bridge::LitKind;
use rustc_lexer::{LiteralKind, Token, TokenKind};
let mut tokens = rustc_lexer::tokenize(s);
let mut tokens = rustc_lexer::tokenize(s, rustc_lexer::FrontmatterAllowed::No);
let minus_or_lit = tokens.next().unwrap_or(Token { kind: TokenKind::Eof, len: 0 });
let lit = if minus_or_lit.kind == TokenKind::Minus {

View file

@ -579,7 +579,7 @@ where
{
use rustc_lexer::LiteralKind;
let token = rustc_lexer::tokenize(text).next_tuple();
let token = rustc_lexer::tokenize(text, rustc_lexer::FrontmatterAllowed::No).next_tuple();
let Some((rustc_lexer::Token {
kind: rustc_lexer::TokenKind::Literal { kind, suffix_start },
..

View file

@ -0,0 +1,10 @@
---
---
//@ compile-flags: -Zunpretty=normal
//@ check-pass
#![feature(frontmatter)]
fn main() {
}

View file

@ -0,0 +1,9 @@
---
---
//@ compile-flags: -Zunpretty=normal
//@ check-pass
#![feature(frontmatter)]
fn main() {}