Auto merge of #149517 - WaffleLapkin:alphabet-blessing, r=jdonszelmann

Implement blessing for tidy alphabetical check r? `@jdonszelmann`
2025-12-07 20:08:33 +00:00 · 2025-12-07 20:08:33 +00:00 · ba2142a19c
commit ba2142a19c
parent fa1f706fbd 3bbd6ea19a
6 changed files with 421 additions and 109 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -5616,6 +5616,7 @@ dependencies = [
 "semver",
 "serde",
 "similar",
+ "tempfile",
 "termcolor",
 "toml 0.7.8",
 "walkdir",
--- a/compiler/rustc_type_ir/src/lib.rs
+++ b/compiler/rustc_type_ir/src/lib.rs
@ -4,11 +4,11 @@
 #![allow(rustc::usage_of_ty_tykind)]
 #![allow(rustc::usage_of_type_ir_inherent)]
 #![allow(rustc::usage_of_type_ir_traits)]
+#![cfg_attr(feature = "nightly", allow(internal_features))]
 #![cfg_attr(
    feature = "nightly",
    feature(associated_type_defaults, never_type, rustc_attrs, negative_impls)
 )]
-#![cfg_attr(feature = "nightly", allow(internal_features))]
 // tidy-alphabetical-end

 extern crate self as rustc_type_ir;
--- a/src/tools/tidy/Cargo.toml
+++ b/src/tools/tidy/Cargo.toml
@ -18,6 +18,7 @@ rustc-hash = "2.0.0"
 fluent-syntax = "0.12"
 similar = "2.5.0"
 toml = "0.7.8"
+tempfile = "3.15.0"

 [features]
 build-metrics = ["dep:serde"]
--- a/src/tools/tidy/src/alphabetical.rs
+++ b/src/tools/tidy/src/alphabetical.rs
@ -9,19 +9,33 @@
 //! // tidy-alphabetical-end
 //! ```
 //!
-//! The following lines are ignored:
-//! - Empty lines
-//! - Lines that are indented with more or less spaces than the first line
-//! - Lines starting with `//`, `#` (except those starting with `#!`), `)`, `]`, `}` if the comment
-//!   has the same indentation as the first line
-//! - Lines starting with a closing delimiter (`)`, `[`, `}`) are ignored.
+//! Empty lines and lines starting (ignoring spaces) with `//` or `#` (except those starting with
+//! `#!`) are considered comments are are sorted together with the next line (but do not affect
+//! sorting).
 //!
-//! If a line ends with an opening delimiter, we effectively join the following line to it before
-//! checking it. E.g. `foo(\nbar)` is treated like `foo(bar)`.
+//! If the following lines have higher indentation we effectively join them with the current line
+//! before comparing it. If the next line with the same indentation starts (ignoring spaces) with
+//! a closing delimiter (`)`, `[`, `}`) it is joined as well.
+//!
+//! E.g.
+//!
+//! ```rust,ignore ilustrative example for sorting mentioning non-existent functions
+//! foo(a,
+//!     b);
+//! bar(
+//!   a,
+//!   b
+//! );
+//! // are treated for sorting purposes as
+//! foo(a, b);
+//! bar(a, b);
+//! ```

 use std::cmp::Ordering;
-use std::fmt::Display;
+use std::fs;
+use std::io::{Seek, Write};
 use std::iter::Peekable;
+use std::ops::{Range, RangeBounds};
 use std::path::Path;

 use crate::diagnostics::{CheckId, RunningCheck, TidyCtx};
@ -38,94 +52,206 @@ fn is_close_bracket(c: char) -> bool {
    matches!(c, ')' | ']' | '}')
 }

+fn is_empty_or_comment(line: &&str) -> bool {
+    let trimmed_line = line.trim_start_matches(' ').trim_end_matches('\n');
+
+    trimmed_line.is_empty()
+        || trimmed_line.starts_with("//")
+        || (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
+}
+
 const START_MARKER: &str = "tidy-alphabetical-start";
 const END_MARKER: &str = "tidy-alphabetical-end";

-fn check_section<'a>(
-    file: impl Display,
-    lines: impl Iterator<Item = (usize, &'a str)>,
-    check: &mut RunningCheck,
-) {
-    let mut prev_line = String::new();
-    let mut first_indent = None;
-    let mut in_split_line = None;
-
-    for (idx, line) in lines {
-        if line.is_empty() {
-            continue;
-        }
-
-        if line.contains(START_MARKER) {
-            check.error(format!(
-                "{file}:{} found `{START_MARKER}` expecting `{END_MARKER}`",
-                idx + 1
-            ));
-            return;
-        }
-
-        if line.contains(END_MARKER) {
-            return;
-        }
-
-        let indent = first_indent.unwrap_or_else(|| {
-            let indent = indentation(line);
-            first_indent = Some(indent);
-            indent
-        });
-
-        let line = if let Some(prev_split_line) = in_split_line {
-            // Join the split lines.
-            in_split_line = None;
-            format!("{prev_split_line}{}", line.trim_start())
-        } else {
-            line.to_string()
-        };
-
-        if indentation(&line) != indent {
-            continue;
-        }
-
-        let trimmed_line = line.trim_start_matches(' ');
-
-        if trimmed_line.starts_with("//")
-            || (trimmed_line.starts_with('#') && !trimmed_line.starts_with("#!"))
-            || trimmed_line.starts_with(is_close_bracket)
-        {
-            continue;
-        }
-
-        if line.trim_end().ends_with('(') {
-            in_split_line = Some(line);
-            continue;
-        }
-
-        let prev_line_trimmed_lowercase = prev_line.trim_start_matches(' ');
-
-        if version_sort(trimmed_line, prev_line_trimmed_lowercase).is_lt() {
-            check.error(format!("{file}:{}: line not in alphabetical order", idx + 1));
-        }
-
-        prev_line = line;
+/// Given contents of a section that is enclosed between [`START_MARKER`] and [`END_MARKER`], sorts
+/// them according to the rules described at the top of the module.
+fn sort_section(section: &str) -> String {
+    /// A sortable item
+    struct Item {
+        /// Full contents including comments and whitespace
+        full: String,
+        /// Trimmed contents for sorting
+        trimmed: String,
    }

-    check.error(format!("{file}: reached end of file expecting `{END_MARKER}`"));
-}
+    let mut items = Vec::new();
+    let mut lines = section.split_inclusive('\n').peekable();

-fn check_lines<'a>(
-    file: &impl Display,
-    mut lines: impl Iterator<Item = (usize, &'a str)>,
-    check: &mut RunningCheck,
-) {
-    while let Some((idx, line)) = lines.next() {
-        if line.contains(END_MARKER) {
-            check.error(format!(
-                "{file}:{} found `{END_MARKER}` expecting `{START_MARKER}`",
-                idx + 1
-            ));
+    let end_comments = loop {
+        let mut full = String::new();
+        let mut trimmed = String::new();
+
+        while let Some(comment) = lines.next_if(is_empty_or_comment) {
+            full.push_str(comment);
        }

-        if line.contains(START_MARKER) {
-            check_section(file, &mut lines, check);
+        let Some(line) = lines.next() else {
+            // remember comments at the end of a block
+            break full;
+        };
+
+        let mut push = |line| {
+            full.push_str(line);
+            trimmed.push_str(line.trim_start_matches(' ').trim_end_matches('\n'))
+        };
+
+        push(line);
+
+        let indent = indentation(line);
+        let mut multiline = false;
+
+        // If the item is split between multiple lines...
+        while let Some(more_indented) =
+            lines.next_if(|&line: &&_| indent < indentation(line) || line == "\n")
+        {
+            multiline = true;
+            push(more_indented);
+        }
+
+        if multiline
+            && let Some(indented) =
+                // Only append next indented line if it looks like a closing bracket.
+                // Otherwise we incorrectly merge code like this (can be seen in
+                // compiler/rustc_session/src/options.rs):
+                //
+                // force_unwind_tables: Option<bool> = (None, parse_opt_bool, [TRACKED],
+                //     "force use of unwind tables"),
+                // incremental: Option<String> = (None, parse_opt_string, [UNTRACKED],
+                //     "enable incremental compilation"),
+                lines.next_if(|l| {
+                    indentation(l) == indent
+                        && l.trim_start_matches(' ').starts_with(is_close_bracket)
+                })
+        {
+            push(indented);
+        }
+
+        items.push(Item { full, trimmed });
+    };
+
+    items.sort_by(|a, b| version_sort(&a.trimmed, &b.trimmed));
+    items.into_iter().map(|l| l.full).chain([end_comments]).collect()
+}
+
+fn check_lines<'a>(path: &Path, content: &'a str, tidy_ctx: &TidyCtx, check: &mut RunningCheck) {
+    let mut offset = 0;
+
+    loop {
+        let rest = &content[offset..];
+        let start = rest.find(START_MARKER);
+        let end = rest.find(END_MARKER);
+
+        match (start, end) {
+            // error handling
+
+            // end before start
+            (Some(start), Some(end)) if end < start => {
+                check.error(format!(
+                    "{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
+                    path = path.display(),
+                    line_number = content[..offset + end].lines().count(),
+                ));
+                break;
+            }
+
+            // end without a start
+            (None, Some(end)) => {
+                check.error(format!(
+                    "{path}:{line_number} found `{END_MARKER}` expecting `{START_MARKER}`",
+                    path = path.display(),
+                    line_number = content[..offset + end].lines().count(),
+                ));
+                break;
+            }
+
+            // start without an end
+            (Some(start), None) => {
+                check.error(format!(
+                    "{path}:{line_number} `{START_MARKER}` without a matching `{END_MARKER}`",
+                    path = path.display(),
+                    line_number = content[..offset + start].lines().count(),
+                ));
+                break;
+            }
+
+            // a second start in between start/end pair
+            (Some(start), Some(end))
+                if rest[start + START_MARKER.len()..end].contains(START_MARKER) =>
+            {
+                check.error(format!(
+                    "{path}:{line_number} found `{START_MARKER}` expecting `{END_MARKER}`",
+                    path = path.display(),
+                    line_number = content[..offset
+                        + sub_find(rest, start + START_MARKER.len()..end, START_MARKER)
+                            .unwrap()
+                            .start]
+                        .lines()
+                        .count()
+                ));
+                break;
+            }
+
+            // happy happy path :3
+            (Some(start), Some(end)) => {
+                assert!(start <= end);
+
+                // "...␤// tidy-alphabetical-start␤...␤// tidy-alphabetical-end␤..."
+                //                  start_nl_end --^  ^-- end_nl_start          ^-- end_nl_end
+
+                // Position after the newline after start marker
+                let start_nl_end = sub_find(rest, start + START_MARKER.len().., "\n").unwrap().end;
+
+                // Position before the new line before the end marker
+                let end_nl_start = rest[..end].rfind('\n').unwrap();
+
+                // Position after the newline after end marker
+                let end_nl_end = sub_find(rest, end + END_MARKER.len().., "\n")
+                    .map(|r| r.end)
+                    .unwrap_or(content.len() - offset);
+
+                let section = &rest[start_nl_end..=end_nl_start];
+                let sorted = sort_section(section);
+
+                // oh nyooo :(
+                if sorted != section {
+                    if !tidy_ctx.is_bless_enabled() {
+                        let base_line_number = content[..offset + start_nl_end].lines().count();
+                        let line_offset = sorted
+                            .lines()
+                            .zip(section.lines())
+                            .enumerate()
+                            .find(|(_, (a, b))| a != b)
+                            .unwrap()
+                            .0;
+                        let line_number = base_line_number + line_offset;
+
+                        check.error(format!(
+                            "{path}:{line_number}: line not in alphabetical order (tip: use --bless to sort this list)",
+                            path = path.display(),
+                        ));
+                    } else {
+                        // Use atomic rename as to not corrupt the file upon crashes/ctrl+c
+                        let mut tempfile =
+                            tempfile::Builder::new().tempfile_in(path.parent().unwrap()).unwrap();
+
+                        fs::copy(path, tempfile.path()).unwrap();
+
+                        tempfile
+                            .as_file_mut()
+                            .seek(std::io::SeekFrom::Start((offset + start_nl_end) as u64))
+                            .unwrap();
+                        tempfile.as_file_mut().write_all(sorted.as_bytes()).unwrap();
+
+                        tempfile.persist(path).unwrap();
+                    }
+                }
+
+                // Start the next search after the end section
+                offset += end_nl_end;
+            }
+
+            // No more alphabetical lists, yay :3
+            (None, None) => break,
        }
    }
 }
@ -133,13 +259,14 @@ fn check_lines<'a>(
 pub fn check(path: &Path, tidy_ctx: TidyCtx) {
    let mut check = tidy_ctx.start_check(CheckId::new("alphabetical").path(path));

-    let skip =
-        |path: &_, _is_dir| filter_dirs(path) || path.ends_with("tidy/src/alphabetical/tests.rs");
+    let skip = |path: &_, _is_dir| {
+        filter_dirs(path)
+            || path.ends_with("tidy/src/alphabetical.rs")
+            || path.ends_with("tidy/src/alphabetical/tests.rs")
+    };

-    walk(path, skip, &mut |entry, contents| {
-        let file = &entry.path().display();
-        let lines = contents.lines().enumerate();
-        check_lines(file, lines, &mut check)
+    walk(path, skip, &mut |entry, content| {
+        check_lines(entry.path(), content, &tidy_ctx, &mut check)
    });
 }

@ -195,3 +322,17 @@ fn version_sort(a: &str, b: &str) -> Ordering {

    it1.next().cmp(&it2.next())
 }
+
+/// Finds `pat` in `s[range]` and returns a range such that `s[ret] == pat`.
+fn sub_find(s: &str, range: impl RangeBounds<usize>, pat: &str) -> Option<Range<usize>> {
+    s[(range.start_bound().cloned(), range.end_bound().cloned())]
+        .find(pat)
+        .map(|pos| {
+            pos + match range.start_bound().cloned() {
+                std::ops::Bound::Included(x) => x,
+                std::ops::Bound::Excluded(x) => x + 1,
+                std::ops::Bound::Unbounded => 0,
+            }
+        })
+        .map(|pos| pos..pos + pat.len())
+}
--- a/src/tools/tidy/src/alphabetical/tests.rs
+++ b/src/tools/tidy/src/alphabetical/tests.rs
@ -7,7 +7,7 @@ use crate::diagnostics::{TidyCtx, TidyFlags};
 fn test(lines: &str, name: &str, expected_msg: &str, expected_bad: bool) {
    let tidy_ctx = TidyCtx::new(Path::new("/"), false, TidyFlags::default());
    let mut check = tidy_ctx.start_check("alphabetical-test");
-    check_lines(&name, lines.lines().enumerate(), &mut check);
+    check_lines(Path::new(name), lines, &tidy_ctx, &mut check);

    assert_eq!(expected_bad, check.is_bad());
    let errors = check.get_errors();
@ -29,6 +29,26 @@ fn bad(lines: &str, expected_msg: &str) {
    test(lines, "bad", expected_msg, true);
 }

+#[track_caller]
+fn bless_test(before: &str, after: &str) {
+    // NB: convert to a temporary *path* (closing the file), so that `check_lines` can then
+    //     atomically replace the file with a blessed version (on windows that requires the file
+    //     to not be open)
+    let temp_path = tempfile::Builder::new().tempfile().unwrap().into_temp_path();
+    std::fs::write(&temp_path, before).unwrap();
+
+    let tidy_ctx = TidyCtx::new(Path::new("/"), false, TidyFlags::new(&["--bless".to_owned()]));
+
+    let mut check = tidy_ctx.start_check("alphabetical-test");
+    check_lines(&temp_path, before, &tidy_ctx, &mut check);
+
+    assert!(!check.is_bad());
+    let new = std::fs::read_to_string(temp_path).unwrap();
+    assert_eq!(new, after);
+
+    good(&new);
+}
+
 #[test]
 fn test_no_markers() {
    let lines = "\
@ -95,7 +115,7 @@ fn test_rust_bad() {
        def
        // tidy-alphabetical-end
    ";
-    bad(lines, "bad:4: line not in alphabetical order");
+    bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
 }

 #[test]
@ -107,7 +127,7 @@ fn test_toml_bad() {
        def
        # tidy-alphabetical-end
    ";
-    bad(lines, "bad:4: line not in alphabetical order");
+    bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
 }

 #[test]
@ -121,7 +141,7 @@ fn test_features_bad() {
        #![feature(def)]
        tidy-alphabetical-end
    ";
-    bad(lines, "bad:4: line not in alphabetical order");
+    bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
 }

 #[test]
@ -134,7 +154,7 @@ fn test_indent_bad() {
            def
        $ tidy-alphabetical-end
    ";
-    bad(lines, "bad:4: line not in alphabetical order");
+    bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");
 }

 #[test]
@ -150,7 +170,7 @@ fn test_split_bad() {
        )
        && tidy-alphabetical-end
    ";
-    bad(lines, "bad:7: line not in alphabetical order");
+    bad(lines, "bad:3: line not in alphabetical order (tip: use --bless to sort this list)");
 }

 #[test]
@ -160,7 +180,7 @@ fn test_double_start() {
        abc
        tidy-alphabetical-start
    ";
-    bad(lines, "bad:3 found `tidy-alphabetical-start` expecting `tidy-alphabetical-end`");
+    bad(lines, "bad:0 `tidy-alphabetical-start` without a matching `tidy-alphabetical-end`");
 }

 #[test]
@ -179,7 +199,7 @@ fn test_missing_end() {
        tidy-alphabetical-start
        abc
    ";
-    bad(lines, "bad: reached end of file expecting `tidy-alphabetical-end`");
+    bad(lines, "bad:0 `tidy-alphabetical-start` without a matching `tidy-alphabetical-end`");
 }

 #[test]
@ -319,7 +339,7 @@ fn test_numeric_bad() {
        item2
        # tidy-alphabetical-end
    ";
-    bad(lines, "bad:4: line not in alphabetical order");
+    bad(lines, "bad:2: line not in alphabetical order (tip: use --bless to sort this list)");

    let lines = "\
        # tidy-alphabetical-start
@ -327,7 +347,7 @@ fn test_numeric_bad() {
        zve64d
        # tidy-alphabetical-end
    ";
-    bad(lines, "bad:3: line not in alphabetical order");
+    bad(lines, "bad:1: line not in alphabetical order (tip: use --bless to sort this list)");

    let lines = "\
        # tidy-alphabetical-start
@ -335,5 +355,154 @@ fn test_numeric_bad() {
        00
        # tidy-alphabetical-end
    ";
-    bad(lines, "bad:3: line not in alphabetical order");
+    bad(lines, "bad:1: line not in alphabetical order (tip: use --bless to sort this list)");
+}
+
+#[test]
+fn multiline() {
+    let lines = "\
+        tidy-alphabetical-start
+        (b,
+         a);
+        (
+          b,
+          a
+        );
+        tidy-alphabetical-end
+    ";
+    good(lines);
+
+    let lines = "\
+        tidy-alphabetical-start
+        (
+          b,
+          a
+        );
+        (b,
+         a);
+        tidy-alphabetical-end
+    ";
+    good(lines);
+
+    let lines = "\
+        tidy-alphabetical-start
+        (c,
+         a);
+        (
+          b,
+          a
+        );
+        tidy-alphabetical-end
+    ";
+    bad(lines, "bad:1: line not in alphabetical order (tip: use --bless to sort this list)");
+
+    let lines = "\
+        tidy-alphabetical-start
+        (
+          c,
+          a
+        );
+        (b,
+         a);
+        tidy-alphabetical-end
+    ";
+    bad(lines, "bad:1: line not in alphabetical order (tip: use --bless to sort this list)");
+
+    let lines = "\
+        force_unwind_tables: Option<bool> = (None, parse_opt_bool, [TRACKED],
+             'force use of unwind tables'),
+        incremental: Option<String> = (None, parse_opt_string, [UNTRACKED],
+            'enable incremental compilation'),
+    ";
+    good(lines);
+}
+
+#[test]
+fn bless_smoke() {
+    let before = "\
+        tidy-alphabetical-start
+        08
+        1
+        11
+        03
+        tidy-alphabetical-end
+    ";
+    let after = "\
+        tidy-alphabetical-start
+        1
+        03
+        08
+        11
+        tidy-alphabetical-end
+    ";
+
+    bless_test(before, after);
+}
+
+#[test]
+fn bless_multiline() {
+    let before = "\
+        tidy-alphabetical-start
+        08 {
+             z}
+        08 {
+           x
+        }
+        1
+        08 {y}
+        02
+        11 (
+          0
+        )
+        03
+            addition
+    notaddition
+        tidy-alphabetical-end
+    ";
+    let after = "\
+        tidy-alphabetical-start
+        1
+        02
+        03
+            addition
+        08 {
+           x
+        }
+        08 {y}
+        08 {
+             z}
+        11 (
+          0
+        )
+    notaddition
+        tidy-alphabetical-end
+    ";
+
+    bless_test(before, after);
+}
+
+#[test]
+fn bless_funny_numbers() {
+    // Because `2` is indented it gets merged into one entry with `1` and gets
+    // interpreted by version sort as `12`, which is greater than `3`.
+    //
+    // This is neither a wanted nor an unwanted behavior, this test just checks
+    // that it hasn't changed.
+
+    let before = "\
+        tidy-alphabetical-start
+        1
+          2
+        3
+        tidy-alphabetical-end
+    ";
+    let after = "\
+        tidy-alphabetical-start
+        3
+        1
+          2
+        tidy-alphabetical-end
+    ";
+
+    bless_test(before, after);
 }
--- a/src/tools/tidy/src/diagnostics.rs
+++ b/src/tools/tidy/src/diagnostics.rs
@ -6,10 +6,10 @@ use std::sync::{Arc, Mutex};

 use termcolor::Color;

+/// CLI flags used by tidy.
 #[derive(Clone, Default)]
-///CLI flags used by tidy.
 pub struct TidyFlags {
-    ///Applies style and formatting changes during a tidy run.
+    /// Applies style and formatting changes during a tidy run.
    bless: bool,
 }