make tidy-alphabetical use a natural sort

This commit is contained in:
Folkert de Vries 2025-05-20 20:23:47 +02:00
parent a17780db7b
commit 1dfc8406dc
No known key found for this signature in database
GPG key ID: 1F17F6FFD112B97C
19 changed files with 261 additions and 59 deletions

View file

@ -1036,18 +1036,18 @@ impl Step for PlainSourceTarball {
let src_files = [
// tidy-alphabetical-start
".gitmodules",
"bootstrap.example.toml",
"Cargo.lock",
"Cargo.toml",
"configure",
"CONTRIBUTING.md",
"COPYRIGHT",
"Cargo.lock",
"Cargo.toml",
"LICENSE-APACHE",
"license-metadata.json",
"LICENSE-MIT",
"README.md",
"RELEASES.md",
"REUSE.toml",
"bootstrap.example.toml",
"configure",
"license-metadata.json",
"x",
"x.ps1",
"x.py",

View file

@ -19,7 +19,9 @@
//! If a line ends with an opening delimiter, we effectively join the following line to it before
//! checking it. E.g. `foo(\nbar)` is treated like `foo(bar)`.
use std::cmp::Ordering;
use std::fmt::Display;
use std::iter::Peekable;
use std::path::Path;
use crate::walk::{filter_dirs, walk};
@ -99,9 +101,9 @@ fn check_section<'a>(
continue;
}
let prev_line_trimmed_lowercase = prev_line.trim_start_matches(' ').to_lowercase();
let prev_line_trimmed_lowercase = prev_line.trim_start_matches(' ');
if trimmed_line.to_lowercase() < prev_line_trimmed_lowercase {
if version_sort(&trimmed_line, &prev_line_trimmed_lowercase).is_lt() {
tidy_error_ext!(err, bad, "{file}:{}: line not in alphabetical order", idx + 1);
}
@ -143,3 +145,56 @@ pub fn check(path: &Path, bad: &mut bool) {
check_lines(file, lines, &mut crate::tidy_error, bad)
});
}
fn consume_numeric_prefix<I: Iterator<Item = char>>(it: &mut Peekable<I>) -> String {
let mut result = String::new();
while let Some(&c) = it.peek() {
if !c.is_numeric() {
break;
}
result.push(c);
it.next();
}
result
}
// A sorting function that is case-sensitive, and sorts sequences of digits by their numeric value,
// so that `9` sorts before `12`.
fn version_sort(a: &str, b: &str) -> Ordering {
let mut it1 = a.chars().peekable();
let mut it2 = b.chars().peekable();
while let (Some(x), Some(y)) = (it1.peek(), it2.peek()) {
match (x.is_numeric(), y.is_numeric()) {
(true, true) => {
let num1: String = consume_numeric_prefix(it1.by_ref());
let num2: String = consume_numeric_prefix(it2.by_ref());
let int1: u64 = num1.parse().unwrap();
let int2: u64 = num2.parse().unwrap();
// Compare strings when the numeric value is equal to handle "00" versus "0".
match int1.cmp(&int2).then_with(|| num1.cmp(&num2)) {
Ordering::Equal => continue,
different => return different,
}
}
(false, false) => match x.cmp(y) {
Ordering::Equal => {
it1.next();
it2.next();
continue;
}
different => return different,
},
(false, true) | (true, false) => {
return x.cmp(y);
}
}
}
it1.next().cmp(&it2.next())
}

View file

@ -3,6 +3,7 @@ use std::str::from_utf8;
use super::*;
#[track_caller]
fn test(lines: &str, name: &str, expected_msg: &str, expected_bad: bool) {
let mut actual_msg = Vec::new();
let mut actual_bad = false;
@ -15,10 +16,12 @@ fn test(lines: &str, name: &str, expected_msg: &str, expected_bad: bool) {
assert_eq!(expected_bad, actual_bad);
}
#[track_caller]
fn good(lines: &str) {
test(lines, "good", "", false);
}
#[track_caller]
fn bad(lines: &str, expected_msg: &str) {
test(lines, "bad", expected_msg, true);
}
@ -187,3 +190,147 @@ fn test_double_end() {
";
bad(lines, "bad:5 found `tidy-alphabetical-end` expecting `tidy-alphabetical-start`");
}
#[test]
fn test_numeric_good() {
good(
"\
# tidy-alphabetical-start
rustc_ast = { path = \"../rustc_ast\" }
rustc_ast_lowering = { path = \"../rustc_ast_lowering\" }
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
fp-armv8
fp16
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
item1
item2
item10
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
foo
foo_
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
foo-bar
foo_bar
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
sme-lutv2
sme2
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
v5te
v6
v6k
v6t2
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
zve64d
zve64f
# tidy-alphabetical-end
",
);
// Case is significant.
good(
"\
# tidy-alphabetical-start
_ZYXW
_abcd
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
v0
v00
v000
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
w005s09t
w5s009t
# tidy-alphabetical-end
",
);
good(
"\
# tidy-alphabetical-start
v0s
v00t
# tidy-alphabetical-end
",
);
}
#[test]
fn test_numeric_bad() {
let lines = "\
# tidy-alphabetical-start
item1
item10
item2
# tidy-alphabetical-end
";
bad(lines, "bad:4: line not in alphabetical order");
let lines = "\
# tidy-alphabetical-start
zve64f
zve64d
# tidy-alphabetical-end
";
bad(lines, "bad:3: line not in alphabetical order");
let lines = "\
# tidy-alphabetical-start
000
00
# tidy-alphabetical-end
";
bad(lines, "bad:3: line not in alphabetical order");
}