Auto merge of #149517 - WaffleLapkin:alphabet-blessing, r=jdonszelmann

Implement blessing for tidy alphabetical check

r? `@jdonszelmann`
This commit is contained in:
bors 2025-12-07 20:08:33 +00:00
commit ba2142a19c
6 changed files with 421 additions and 109 deletions

View file

@ -5616,6 +5616,7 @@ dependencies = [
"semver",
"serde",
"similar",
"tempfile",
"termcolor",
"toml 0.7.8",
"walkdir",

View file

@ -4,11 +4,11 @@
#![allow(rustc::usage_of_ty_tykind)]
#![allow(rustc::usage_of_type_ir_inherent)]
#![allow(rustc::usage_of_type_ir_traits)]
#![cfg_attr(feature = "nightly", allow(internal_features))]
#![cfg_attr(
feature = "nightly",
feature(associated_type_defaults, never_type, rustc_attrs, negative_impls)
)]
#![cfg_attr(feature = "nightly", allow(internal_features))]
// tidy-alphabetical-end
extern crate self as rustc_type_ir;

View file

@ -18,6 +18,7 @@ rustc-hash = "2.0.0"
fluent-syntax = "0.12"
similar = "2.5.0"
toml = "0.7.8"
tempfile = "3.15.0"
[features]
build-metrics = ["dep:serde"]

View file

@ -9,19 +9,33 @@
//! // tidy-alphabetical-end
//! ```
//!
//! The following lines are ignored:
//! - Empty lines
//! - Lines that are indented with more or less spaces than the first line
//! - Lines starting with `//`, `#` (except those starting with `#!`), `)`, `]`, `}` if the comment
//! has the same indentation as the first line
//! - Lines starting with a closing delimiter (`)`, `[`, `}`) are ignored.
//! Empty lines and lines starting (ignoring spaces) with `//` or `#` (except those starting with
//! `#!`) are considered comments are are sorted together with the next line (but do not affect
//! sorting).
//!
//! If a line ends with an opening delimiter, we effectively join the following line to it before
//! checking it. E.g. `foo(\nbar)` is treated like `foo(bar)`.
//! If the following lines have higher indentation we effectively join them with the current line
//! before comparing it. If the next line with the same indentation starts (ignoring spaces) with
//! a closing delimiter (`)`, `[`, `}`) it is joined as well.
//!
//! E.g.
//!
//! ```rust,ignore ilustrative example for sorting mentioning non-existent functions
//! foo(a,
//! b);
//! bar(
//! a,
//! b
//! );
//! // are treated for sorting purposes as
//! foo(a, b);
//! bar(a, b);
//! ```
use std::cmp::Ordering;
use std::fmt::Display;
use std::fs;
use std::io::{Seek, Write};
use std::iter::Peekable;
use std::ops::{Range, RangeBounds};
use std::path::Path;
use crate::diagnostics::{CheckId, RunningCheck, TidyCtx};
@ -38,94 +52,206 @@ fn is_close_bracket(c: char) -> bool {
matches!(c, ')' | ']' | '}')
}
fn is_empty_or_comment(line: &&str) -> bool {
let trimmed_line = line.trim_start_matches(' ').trim_end_matches('\n');
trimmed_line.is_empty()
|| trimmed_line.starts_with("//")
|| (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
}
const START_MARKER: &str = "tidy-alphabetical-start";
const END_MARKER: &str = "tidy-alphabetical-end";
fn check_section<'a>(
file: impl Display,
lines: impl Iterator<Item = (usize, &'a str)>,
check: &mut RunningCheck,
) {
let mut prev_line = String::new();
let mut first_indent = None;
let mut in_split_line = None;
for (idx, line) in lines {
if line.is_empty() {
continue;
}
if line.contains(START_MARKER) {
check.error(format!(
"{file}:{} found `{START_MARKER}` expecting `{END_MARKER}`",
idx + 1
));
return;
}
if line.contains(END_MARKER) {
return;
}
let indent = first_indent.unwrap_or_else(|| {
let indent = indentation(line);
first_indent = Some(indent);
indent
});
let line = if let Some(prev_split_line) = in_split_line {
// Join the split lines.
in_split_line = None;
format!("{prev_split_line}{}", line.trim_start())
} else {
line.to_string()
};
if indentation(&line) != indent {
continue;
}
let trimmed_line = line.trim_start_matches(' ');
if trimmed_line.starts_with("//")
|| (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
|| trimmed_line.starts_with(is_close_bracket)
{
continue;
}
if line.trim_end().ends_with('(') {
in_split_line = Some(line);
continue;
}
let prev_line_trimmed_lowercase = prev_line.trim_start_matches(' ');
if version_sort(trimmed_line, prev_line_trimmed_lowercase).is_lt() {
check.error(format!("{file}:{}: line not in alphabetical order", idx + 1));
}
prev_line = line;
/// Given contents of a section that is enclosed between [`START_MARKER`] and [`END_MARKER`], sorts
/// them according to the rules described at the top of the module.
fn sort_section(section: &str) -> String {
/// A sortable item
struct Item {
/// Full contents including comments and whitespace
full: String,
/// Trimmed contents for sorting
trimmed: String,
}
check.error(format!("{file}: reached end of file expecting `{END_MARKER}`"));
}
let mut items = Vec::new();
let mut lines = section.split_inclusive('\n').peekable();
fn check_lines<'a>(
file: &impl Display,
mut lines: impl Iterator<Item = (usize, &'a str)>,
check: &mut RunningCheck,
) {
while let Some((idx, line)) = lines.next() {
if line.contains(END_MARKER) {
check.error(format!(
"{file}:{} found `{END_MARKER}` expecting `{START_MARKER}`",
idx + 1
));
let end_comments = loop {
let mut full = String::new();
let mut trimmed = String::new();
while let Some(comment) = lines.next_if(is_empty_or_comment) {
full.push_str(comment);
}
if line.contains(START_MARKER) {
check_section(file, &mut lines, check);
let Some(line) = lines.next() else {
// remember comments at the end of a block
break full;
};
let mut push = |line| {
full.push_str(line);
trimmed.push_str(line.trim_start_matches(' ').trim_end_matches('\n'))
};
push(line);
let indent = indentation(line);
let mut multiline = false;
// If the item is split between multiple lines...
while let Some(more_indented) =
lines.next_if(|&line: &&_| indent < indentation(line) || line == "\n")
{
multiline = true;
push(more_indented);
}
if multiline
&& let Some(indented) =
// Only append next indented line if it looks like a closing bracket.
// Otherwise we incorrectly merge code like this (can be seen in
// compiler/rustc_session/src/options.rs):
//
// force_unwind_tables: Option<bool> = (None, parse_opt_bool, [TRACKED],
// "force use of unwind tables"),
// incremental: Option<String> = (None, parse_opt_string, [UNTRACKED],
// "enable incremental compilation"),
lines.next_if(|l| {
indentation(l) == indent
&& l.trim_start_matches(' ').starts_with(is_close_bracket)
})
{
push(indented);
}
items.push(Item { full, trimmed });
};
items.sort_by(|a, b| version_sort(&a.trimmed, &b.trimmed));
items.into_iter().map(|l| l.full).chain([end_comments]).collect()
}
fn check_lines<'a>(path: &Path, content: &'a str, tidy_ctx: &TidyCtx, check: &mut RunningCheck) {
let mut offset = 0;
loop {
let rest = &content[offset..];
let start = rest.find(START_MARKER);
let end = rest.find(END_MARKER);
match (start, end) {
// error handling
// end before start
(Some(start), Some(end)) if end < start => {
check.error(format!(
"{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
path = path.display(),
line_number = content[..offset + end].lines().count(),
));
break;
}
// end without a start
(None, Some(end)) => {
check.error(format!(
"{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
path = path.display(),
line_number = content[..offset + end].lines().count(),
));
break;
}
// start without an end
(Some(start), None) => {
check.error(format!(
"{path}:{line_number} `{START_MARKER}` without a matching `{END_MARKER}`",
path = path.display(),
line_number = content[..offset + start].lines().count(),
));
break;
}
// a second start in between start/end pair
(Some(start), Some(end))
if rest[start + START_MARKER.len()..end].contains(START_MARKER) =>
{
check.error(format!(
"{path}:{line_number} found `{START_MARKER}` expecting `{END_MARKER}`",
path = path.display(),
line_number = content[..offset
+ sub_find(rest, start + START_MARKER.len()..end, START_MARKER)
.unwrap()
.start]
.lines()
.count()
));
break;
}
// happy happy path :3
(Some(start), Some(end)) => {
assert!(start <= end);
// "...␤// tidy-alphabetical-start␤...␤// tidy-alphabetical-end␤..."
// start_nl_end --^ ^-- end_nl_start ^-- end_nl_end
// Position after the newline after start marker
let start_nl_end = sub_find(rest, start + START_MARKER.len().., "\n").unwrap().end;
// Position before the new line before the end marker
let end_nl_start = rest[..end].rfind('\n').unwrap();
// Position after the newline after end marker
let end_nl_end = sub_find(rest, end + END_MARKER.len().., "\n")
.map(|r| r.end)
.unwrap_or(content.len() - offset);
let section = &rest[start_nl_end..=end_nl_start];
let sorted = sort_section(section);
// oh nyooo :(
if sorted != section {
if !tidy_ctx.is_bless_enabled() {
let base_line_number = content[..offset + start_nl_end].lines().count();
let line_offset = sorted
.lines()
.zip(section.lines())
.enumerate()
.find(|(_, (a, b))| a != b)
.unwrap()
.0;
let line_number = base_line_number + line_offset;
check.error(format!(
"{path}:{line_number}: line not in alphabetical order (tip: use --bless to sort this list)",
path = path.display(),
));
} else {
// Use atomic rename as to not corrupt the file upon crashes/ctrl+c
let mut tempfile =
tempfile::Builder::new().tempfile_in(path.parent().unwrap()).unwrap();
fs::copy(path, tempfile.path()).unwrap();
tempfile
.as_file_mut()
.seek(std::io::SeekFrom::Start((offset + start_nl_end) as u64))
.unwrap();
tempfile.as_file_mut().write_all(sorted.as_bytes()).unwrap();
tempfile.persist(path).unwrap();
}
}
// Start the next search after the end section
offset += end_nl_end;
}
// No more alphabetical lists, yay :3
(None, None) => break,
}
}
}
@ -133,13 +259,14 @@ fn check_lines<'a>(
pub fn check(path: &Path, tidy_ctx: TidyCtx) {
let mut check = tidy_ctx.start_check(CheckId::new("alphabetical").path(path));
let skip =
|path: &_, _is_dir| filter_dirs(path) || path.ends_with("tidy/src/alphabetical/tests.rs");
let skip = |path: &_, _is_dir| {
filter_dirs(path)
|| path.ends_with("tidy/src/alphabetical.rs")
|| path.ends_with("tidy/src/alphabetical/tests.rs")
};
walk(path, skip, &mut |entry, contents| {
let file = &entry.path().display();
let lines = contents.lines().enumerate();
check_lines(file, lines, &mut check)
walk(path, skip, &mut |entry, content| {
check_lines(entry.path(), content, &tidy_ctx, &mut check)
});
}
@ -195,3 +322,17 @@ fn version_sort(a: &str, b: &str) -> Ordering {
it1.next().cmp(&it2.next())
}
/// Finds `pat` in `s[range]` and returns a range such that `s[ret] == pat`.
fn sub_find(s: &str, range: impl RangeBounds<usize>, pat: &str) -> Option<Range<usize>> {
s[(range.start_bound().cloned(), range.end_bound().cloned())]
.find(pat)
.map(|pos| {
pos + match range.start_bound().cloned() {
std::ops::Bound::Included(x) => x,
std::ops::Bound::Excluded(x) => x + 1,
std::ops::Bound::Unbounded => 0,
}
})
.map(|pos| pos..pos + pat.len())
}

View file

@ -7,7 +7,7 @@ use crate::diagnostics::{TidyCtx, TidyFlags};
fn test(lines: &str, name: &str, expected_msg: &str, expected_bad: bool) {
let tidy_ctx = TidyCtx::new(Path::new("/"), false, TidyFlags::default());
let mut check = tidy_ctx.start_check("alphabetical-test");
check_lines(&name, lines.lines().enumerate(), &mut check);
check_lines(Path::new(name), lines, &tidy_ctx, &mut check);
assert_eq!(expected_bad, check.is_bad());
let errors = check.get_errors();
@ -29,6 +29,26 @@ fn bad(lines: &str, expected_msg: &str) {
test(lines, "bad", expected_msg, true);
}
#[track_caller]
fn bless_test(before: &str, after: &str) {
// NB: convert to a temporary *path* (closing the file), so that `check_lines` can then
// atomically replace the file with a blessed version (on windows that requires the file
// to not be open)
let temp_path = tempfile::Builder::new().tempfile().unwrap().into_temp_path();
std::fs::write(&temp_path, before).unwrap();
let tidy_ctx = TidyCtx::new(Path::new("/"), false, TidyFlags::new(&["--bless".to_owned()]));
let mut check = tidy_ctx.start_check("alphabetical-test");
check_lines(&temp_path, before, &tidy_ctx, &mut check);
assert!(!check.is_bad());
let new = std::fs::read_to_string(temp_path).unwrap();
assert_eq!(new, after);
good(&new);
}
#[test]
fn test_no_markers() {
let lines = "\
@ -95,7 +115,7 @@ fn test_rust_bad() {
def
// tidy-alphabetical-end
";
bad(lines, "bad:4: line not in alphabetical order");
bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
}
#[test]
@ -107,7 +127,7 @@ fn test_toml_bad() {
def
# tidy-alphabetical-end
";
bad(lines, "bad:4: line not in alphabetical order");
bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
}
#[test]
@ -121,7 +141,7 @@ fn test_features_bad() {
#![feature(def)]
tidy-alphabetical-end
";
bad(lines, "bad:4: line not in alphabetical order");
bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
}
#[test]
@ -134,7 +154,7 @@ fn test_indent_bad() {
def
$ tidy-alphabetical-end
";
bad(lines, "bad:4: line not in alphabetical order");
bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
}
#[test]
@ -150,7 +170,7 @@ fn test_split_bad() {
)
&& tidy-alphabetical-end
";
bad(lines, "bad:7: line not in alphabetical order");
bad(lines, "bad:3: line not in alphabetical order (tip: use --bless to sort this list)");
}
#[test]
@ -160,7 +180,7 @@ fn test_double_start() {
abc
tidy-alphabetical-start
";
bad(lines, "bad:3 found `tidy-alphabetical-start` expecting `tidy-alphabetical-end`");
bad(lines, "bad:0 `tidy-alphabetical-start` without a matching `tidy-alphabetical-end`");
}
#[test]
@ -179,7 +199,7 @@ fn test_missing_end() {
tidy-alphabetical-start
abc
";
bad(lines, "bad: reached end of file expecting `tidy-alphabetical-end`");
bad(lines, "bad:0 `tidy-alphabetical-start` without a matching `tidy-alphabetical-end`");
}
#[test]
@ -319,7 +339,7 @@ fn test_numeric_bad() {
item2
# tidy-alphabetical-end
";
bad(lines, "bad:4: line not in alphabetical order");
bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
let lines = "\
# tidy-alphabetical-start
@ -327,7 +347,7 @@ fn test_numeric_bad() {
zve64d
# tidy-alphabetical-end
";
bad(lines, "bad:3: line not in alphabetical order");
bad(lines, "bad:1: line not in alphabetical order (tip: use --bless to sort this list)");
let lines = "\
# tidy-alphabetical-start
@ -335,5 +355,154 @@ fn test_numeric_bad() {
00
# tidy-alphabetical-end
";
bad(lines, "bad:3: line not in alphabetical order");
bad(lines, "bad:1: line not in alphabetical order (tip: use --bless to sort this list)");
}
#[test]
fn multiline() {
let lines = "\
tidy-alphabetical-start
(b,
a);
(
b,
a
);
tidy-alphabetical-end
";
good(lines);
let lines = "\
tidy-alphabetical-start
(
b,
a
);
(b,
a);
tidy-alphabetical-end
";
good(lines);
let lines = "\
tidy-alphabetical-start
(c,
a);
(
b,
a
);
tidy-alphabetical-end
";
bad(lines, "bad:1: line not in alphabetical order (tip: use --bless to sort this list)");
let lines = "\
tidy-alphabetical-start
(
c,
a
);
(b,
a);
tidy-alphabetical-end
";
bad(lines, "bad:1: line not in alphabetical order (tip: use --bless to sort this list)");
let lines = "\
force_unwind_tables: Option<bool> = (None, parse_opt_bool, [TRACKED],
'force use of unwind tables'),
incremental: Option<String> = (None, parse_opt_string, [UNTRACKED],
'enable incremental compilation'),
";
good(lines);
}
#[test]
fn bless_smoke() {
let before = "\
tidy-alphabetical-start
08
1
11
03
tidy-alphabetical-end
";
let after = "\
tidy-alphabetical-start
1
03
08
11
tidy-alphabetical-end
";
bless_test(before, after);
}
#[test]
fn bless_multiline() {
let before = "\
tidy-alphabetical-start
08 {
z}
08 {
x
}
1
08 {y}
02
11 (
0
)
03
addition
notaddition
tidy-alphabetical-end
";
let after = "\
tidy-alphabetical-start
1
02
03
addition
08 {
x
}
08 {y}
08 {
z}
11 (
0
)
notaddition
tidy-alphabetical-end
";
bless_test(before, after);
}
#[test]
fn bless_funny_numbers() {
// Because `2` is indented it gets merged into one entry with `1` and gets
// interpreted by version sort as `12`, which is greater than `3`.
//
// This is neither a wanted nor an unwanted behavior, this test just checks
// that it hasn't changed.
let before = "\
tidy-alphabetical-start
1
2
3
tidy-alphabetical-end
";
let after = "\
tidy-alphabetical-start
3
1
2
tidy-alphabetical-end
";
bless_test(before, after);
}

View file

@ -6,10 +6,10 @@ use std::sync::{Arc, Mutex};
use termcolor::Color;
/// CLI flags used by tidy.
#[derive(Clone, Default)]
///CLI flags used by tidy.
pub struct TidyFlags {
///Applies style and formatting changes during a tidy run.
/// Applies style and formatting changes during a tidy run.
bless: bool,
}