Rollup merge of #151262 - Shunpoco:tidy-clap, r=Zalathar

Introducing clap on tidy ### Context Currently tidy parses paths/flags from args_os manually, and the extraction is spreading multiple files. It may be a breeding ground for bugs. ref: https://rust-lang.zulipchat.com/#narrow/channel/326414-t-infra.2Fbootstrap/topic/How.20--ci.3Dtrue.20interact.20with.20CiEnv.3F/near/543171560 (N.B. We've talked about introducing a ci flag in tidy in that thread, but I don't do it in this PR as I don't want to put multiple changes into a PR. I will introduce the flag in a coming PR.) ### Changes This PR replaces current parsing logic with clap. To confirm the new parser works fine, I introduce an unit test for it. ### Build time We've concerned about how clap increases the build time. In order to confirm the increment is acceptable, I did an experiment on CI: - Run cargo build without cache for tidy 50 times in each environment on CI - Calculate an average and a standard deviation from the result, and plot them Here is the graph: <img width="943" height="530" alt="rust_tidy_build_time" src="https://github.com/user-attachments/assets/c7deee69-9f38-4044-87dc-76d6e7384f76" /> - Clap tends to increase build time ~2s. We think this is not a big problem - Build time differs in each environment - In some cases standard deviation are high, I suppose that busyness of CI instances affect build time
2026-01-31 10:41:52 +01:00 · 2026-01-31 10:41:52 +01:00 · df7d12a24d
commit df7d12a24d
parent 310ae8ca77 1485873233
10 changed files with 306 additions and 52 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -580,9 +580,9 @@ dependencies = [

 [[package]]
 name = "clap"
-version = "4.5.51"
+version = "4.5.54"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5"
+checksum = "c6e6ff9dcd79cff5cd969a17a545d79e84ab086e444102a591e288a8aa3ce394"
 dependencies = [
 "clap_builder",
 "clap_derive",
@ -600,9 +600,9 @@ dependencies = [

 [[package]]
 name = "clap_builder"
-version = "4.5.51"
+version = "4.5.54"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a"
+checksum = "fa42cf4d2b7a41bc8f663a7cab4031ebafa1bf3875705bfaf8466dc60ab52c00"
 dependencies = [
 "anstream",
 "anstyle",
@ -5623,6 +5623,7 @@ version = "0.1.0"
 dependencies = [
 "build_helper",
 "cargo_metadata 0.21.0",
+ "clap",
 "fluent-syntax",
 "globset",
 "ignore",
--- a/src/bootstrap/src/core/build_steps/test.rs
+++ b/src/bootstrap/src/core/build_steps/test.rs
@ -1299,19 +1299,19 @@ impl Step for Tidy {
    /// for the `dev` or `nightly` channels.
    fn run(self, builder: &Builder<'_>) {
        let mut cmd = builder.tool_cmd(Tool::Tidy);
-        cmd.arg(&builder.src);
-        cmd.arg(&builder.initial_cargo);
-        cmd.arg(&builder.out);
+        cmd.arg(format!("--root-path={}", &builder.src.display()));
+        cmd.arg(format!("--cargo-path={}", &builder.initial_cargo.display()));
+        cmd.arg(format!("--output-dir={}", &builder.out.display()));
        // Tidy is heavily IO constrained. Still respect `-j`, but use a higher limit if `jobs` hasn't been configured.
        let jobs = builder.config.jobs.unwrap_or_else(|| {
            8 * std::thread::available_parallelism().map_or(1, std::num::NonZeroUsize::get) as u32
        });
-        cmd.arg(jobs.to_string());
+        cmd.arg(format!("--concurrency={jobs}"));
        // pass the path to the yarn command used for installing js deps.
        if let Some(yarn) = &builder.config.yarn {
-            cmd.arg(yarn);
+            cmd.arg(format!("--npm-path={}", yarn.display()));
        } else {
-            cmd.arg("yarn");
+            cmd.arg("--npm-path=yarn");
        }
        if builder.is_verbose() {
            cmd.arg("--verbose");
--- a/src/tools/tidy/Cargo.toml
+++ b/src/tools/tidy/Cargo.toml
@ -20,6 +20,7 @@ fluent-syntax = "0.12"
 similar = "2.5.0"
 toml = "0.7.8"
 tempfile = "3.15.0"
+clap = "4.5.54"

 [features]
 build-metrics = ["dep:serde"]
--- a/src/tools/tidy/src/alphabetical/tests.rs
+++ b/src/tools/tidy/src/alphabetical/tests.rs
@ -37,7 +37,7 @@ fn bless_test(before: &str, after: &str) {
    let temp_path = tempfile::Builder::new().tempfile().unwrap().into_temp_path();
    std::fs::write(&temp_path, before).unwrap();

-    let tidy_ctx = TidyCtx::new(Path::new("/"), false, TidyFlags::new(&["--bless".to_owned()]));
+    let tidy_ctx = TidyCtx::new(Path::new("/"), false, TidyFlags::new(true));

    let mut check = tidy_ctx.start_check("alphabetical-test");
    check_lines(&temp_path, before, &tidy_ctx, &mut check);
--- a/src/tools/tidy/src/arg_parser.rs
+++ b/src/tools/tidy/src/arg_parser.rs
@ -0,0 +1,102 @@
+use std::num::NonZeroUsize;
+use std::path::PathBuf;
+
+use clap::{Arg, ArgAction, ArgMatches, Command, value_parser};
+
+#[cfg(test)]
+mod tests;
+
+#[derive(Debug, Clone)]
+pub struct TidyArgParser {
+    pub root_path: PathBuf,
+    pub cargo: PathBuf,
+    pub output_directory: PathBuf,
+    pub concurrency: NonZeroUsize,
+    pub npm: PathBuf,
+    pub verbose: bool,
+    pub bless: bool,
+    pub extra_checks: Option<Vec<String>>,
+    pub pos_args: Vec<String>,
+}
+
+impl TidyArgParser {
+    fn command() -> Command {
+        Command::new("rust-tidy")
+            .arg(
+                Arg::new("root_path")
+                    .help("path of the root directory")
+                    .long("root-path")
+                    .required(true)
+                    .value_parser(value_parser!(PathBuf)),
+            )
+            .arg(
+                Arg::new("cargo")
+                    .help("path of cargo")
+                    .long("cargo-path")
+                    .required(true)
+                    .value_parser(value_parser!(PathBuf)),
+            )
+            .arg(
+                Arg::new("output_directory")
+                    .help("path of output directory")
+                    .long("output-dir")
+                    .required(true)
+                    .value_parser(value_parser!(PathBuf)),
+            )
+            .arg(
+                Arg::new("concurrency")
+                    .help("number of threads working concurrently")
+                    .long("concurrency")
+                    .required(true)
+                    .value_parser(value_parser!(NonZeroUsize)),
+            )
+            .arg(
+                Arg::new("npm")
+                    .help("path of npm")
+                    .long("npm-path")
+                    .required(true)
+                    .value_parser(value_parser!(PathBuf)),
+            )
+            .arg(Arg::new("verbose").help("verbose").long("verbose").action(ArgAction::SetTrue))
+            .arg(Arg::new("bless").help("target files are modified").long("bless").action(ArgAction::SetTrue))
+            .arg(
+                Arg::new("extra_checks")
+                    .help("extra checks")
+                    .long("extra-checks")
+                    .value_delimiter(',')
+                    .action(ArgAction::Append),
+            )
+            .arg(Arg::new("pos_args").help("for extra checks. you can specify configs and target files for external check tools").action(ArgAction::Append).last(true))
+    }
+
+    fn build(matches: ArgMatches) -> Self {
+        let mut tidy_flags = Self {
+            root_path: matches.get_one::<PathBuf>("root_path").unwrap().clone(),
+            cargo: matches.get_one::<PathBuf>("cargo").unwrap().clone(),
+            output_directory: matches.get_one::<PathBuf>("output_directory").unwrap().clone(),
+            concurrency: *matches.get_one::<NonZeroUsize>("concurrency").unwrap(),
+            npm: matches.get_one::<PathBuf>("npm").unwrap().clone(),
+            verbose: *matches.get_one::<bool>("verbose").unwrap(),
+            bless: *matches.get_one::<bool>("bless").unwrap(),
+            extra_checks: None,
+            pos_args: vec![],
+        };
+
+        if let Some(extra_checks) = matches.get_many::<String>("extra_checks") {
+            tidy_flags.extra_checks = Some(extra_checks.map(|s| s.to_string()).collect::<Vec<_>>());
+        }
+
+        tidy_flags.pos_args = matches
+            .get_many::<String>("pos_args")
+            .unwrap_or_default()
+            .map(|v| v.to_string())
+            .collect::<Vec<_>>();
+
+        tidy_flags
+    }
+
+    pub fn parse() -> Self {
+        let matches = Self::command().get_matches();
+        Self::build(matches)
+    }
+}
--- a/src/tools/tidy/src/arg_parser/tests.rs
+++ b/src/tools/tidy/src/arg_parser/tests.rs
@ -0,0 +1,168 @@
+use std::path::PathBuf;
+
+use crate::arg_parser::TidyArgParser;
+
+// Test all arguments
+#[test]
+fn test_tidy_parser_full() {
+    let args = vec![
+        "rust-tidy",
+        "--root-path",
+        "/home/user/rust",
+        "--cargo-path",
+        "/home/user/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo",
+        "--output-dir",
+        "/home/user/rust/build",
+        "--concurrency",
+        "16",
+        "--npm-path",
+        "yarn",
+        "--verbose",
+        "--bless",
+        "--extra-checks",
+        "if-installed:auto:js,auto:if-installed:py,if-installed:auto:cpp,if-installed:auto:spellcheck",
+        "--", // pos_args
+        "some-file",
+        "some-file2",
+    ];
+    let cmd = TidyArgParser::command();
+    let parsed_args = TidyArgParser::build(cmd.get_matches_from(args));
+
+    assert_eq!(parsed_args.root_path, PathBuf::from("/home/user/rust"));
+    assert_eq!(
+        parsed_args.cargo,
+        PathBuf::from("/home/user/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo")
+    );
+    assert_eq!(parsed_args.output_directory, PathBuf::from("/home/user/rust/build"));
+    assert_eq!(parsed_args.concurrency.get(), 16);
+    assert_eq!(parsed_args.npm, PathBuf::from("yarn"));
+    assert!(parsed_args.verbose);
+    assert!(parsed_args.bless);
+    assert_eq!(
+        parsed_args.extra_checks,
+        Some(vec![
+            "if-installed:auto:js".to_string(),
+            "auto:if-installed:py".to_string(),
+            "if-installed:auto:cpp".to_string(),
+            "if-installed:auto:spellcheck".to_string(),
+        ])
+    );
+    assert_eq!(parsed_args.pos_args, vec!["some-file".to_string(), "some-file2".to_string()]);
+}
+
+// The parser can take required args any order
+#[test]
+fn test_tidy_parser_any_order() {
+    let args = vec![
+        "rust-tidy",
+        "--npm-path",
+        "yarn",
+        "--concurrency",
+        "16",
+        "--output-dir",
+        "/home/user/rust/build",
+        "--cargo-path",
+        "/home/user/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo",
+        "--root-path",
+        "/home/user/rust",
+    ];
+    let cmd = TidyArgParser::command();
+    let parsed_args = TidyArgParser::build(cmd.get_matches_from(args));
+
+    assert_eq!(parsed_args.root_path, PathBuf::from("/home/user/rust"));
+    assert_eq!(
+        parsed_args.cargo,
+        PathBuf::from("/home/user/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo")
+    );
+    assert_eq!(parsed_args.output_directory, PathBuf::from("/home/user/rust/build"));
+    assert_eq!(parsed_args.concurrency.get(), 16);
+    assert_eq!(parsed_args.npm, PathBuf::from("yarn"));
+}
+
+// --root-path is required
+#[test]
+fn test_tidy_parser_missing_root_path() {
+    let args = vec![
+        "rust-tidy",
+        "--npm-path",
+        "yarn",
+        "--concurrency",
+        "16",
+        "--output-dir",
+        "/home/user/rust/build",
+        "--cargo-path",
+        "/home/user/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo",
+    ];
+    let cmd = TidyArgParser::command();
+    assert!(cmd.try_get_matches_from(args).is_err());
+}
+
+// --cargo-path is required
+#[test]
+fn test_tidy_parser_missing_cargo_path() {
+    let args = vec![
+        "rust-tidy",
+        "--npm-path",
+        "yarn",
+        "--concurrency",
+        "16",
+        "--output-dir",
+        "/home/user/rust/build",
+        "--root-path",
+        "/home/user/rust",
+    ];
+    let cmd = TidyArgParser::command();
+    assert!(cmd.try_get_matches_from(args).is_err());
+}
+
+// --output-dir is required
+#[test]
+fn test_tidy_parser_missing_output_dir() {
+    let args = vec![
+        "rust-tidy",
+        "--npm-path",
+        "yarn",
+        "--concurrency",
+        "16",
+        "--cargo-path",
+        "/home/user/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo",
+        "--root-path",
+        "/home/user/rust",
+    ];
+    let cmd = TidyArgParser::command();
+    assert!(cmd.try_get_matches_from(args).is_err());
+}
+
+// --concurrency is required
+#[test]
+fn test_tidy_parser_missing_concurrency() {
+    let args = vec![
+        "rust-tidy",
+        "--npm-path",
+        "yarn",
+        "--output-dir",
+        "/home/user/rust/build",
+        "--cargo-path",
+        "/home/user/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo",
+        "--root-path",
+        "/home/user/rust",
+    ];
+    let cmd = TidyArgParser::command();
+    assert!(cmd.try_get_matches_from(args).is_err());
+}
+
+// --npm-path is required
+#[test]
+fn test_tidy_parser_missing_npm_path() {
+    let args = vec![
+        "rust-tidy",
+        "--concurrency",
+        "16",
+        "--output-dir",
+        "/home/user/rust/build",
+        "--cargo-path",
+        "/home/user/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo",
+    ];
+    let cmd = TidyArgParser::command();
+    assert!(cmd.try_get_matches_from(args).is_err());
+}
--- a/src/tools/tidy/src/diagnostics.rs
+++ b/src/tools/tidy/src/diagnostics.rs
@ -14,16 +14,8 @@ pub struct TidyFlags {
 }

 impl TidyFlags {
-    pub fn new(cfg_args: &[String]) -> Self {
-        let mut flags = Self::default();
-
-        for arg in cfg_args {
-            match arg.as_str() {
-                "--bless" => flags.bless = true,
-                _ => continue,
-            }
-        }
-        flags
+    pub fn new(bless: bool) -> Self {
+        Self { bless }
    }
 }

--- a/src/tools/tidy/src/extra_checks/mod.rs
+++ b/src/tools/tidy/src/extra_checks/mod.rs
@ -58,8 +58,8 @@ pub fn check(
    tools_path: &Path,
    npm: &Path,
    cargo: &Path,
-    extra_checks: Option<&str>,
-    pos_args: &[String],
+    extra_checks: Option<Vec<String>>,
+    pos_args: Vec<String>,
    tidy_ctx: TidyCtx,
 ) {
    let mut check = tidy_ctx.start_check("extra_checks");
@ -88,8 +88,8 @@ fn check_impl(
    tools_path: &Path,
    npm: &Path,
    cargo: &Path,
-    extra_checks: Option<&str>,
-    pos_args: &[String],
+    extra_checks: Option<Vec<String>>,
+    pos_args: Vec<String>,
    tidy_ctx: &TidyCtx,
 ) -> Result<(), Error> {
    let show_diff =
@ -99,9 +99,7 @@ fn check_impl(
    // Split comma-separated args up
    let mut lint_args = match extra_checks {
        Some(s) => s
-            .strip_prefix("--extra-checks=")
-            .unwrap()
-            .split(',')
+            .iter()
            .map(|s| {
                if s == "spellcheck:fix" {
                    eprintln!("warning: `spellcheck:fix` is no longer valid, use `--extra-checks=spellcheck --bless`");
--- a/src/tools/tidy/src/lib.rs
+++ b/src/tools/tidy/src/lib.rs
@ -157,6 +157,7 @@ pub fn files_modified(ci_info: &CiInfo, pred: impl Fn(&str) -> bool) -> bool {
 }

 pub mod alphabetical;
+pub mod arg_parser;
 pub mod bins;
 pub mod debug_artifacts;
 pub mod deps;
--- a/src/tools/tidy/src/main.rs
+++ b/src/tools/tidy/src/main.rs
@ -5,12 +5,10 @@
 //! builders. The tidy checks can be executed with `./x.py test tidy`.

 use std::collections::VecDeque;
-use std::num::NonZeroUsize;
-use std::path::PathBuf;
-use std::str::FromStr;
 use std::thread::{self, ScopedJoinHandle, scope};
 use std::{env, process};

+use tidy::arg_parser::TidyArgParser;
 use tidy::diagnostics::{COLOR_ERROR, COLOR_SUCCESS, TidyCtx, TidyFlags, output_message};
 use tidy::*;

@ -22,14 +20,13 @@ fn main() {
        env::set_var("RUSTC_BOOTSTRAP", "1");
    }

-    let root_path: PathBuf = env::args_os().nth(1).expect("need path to root of repo").into();
-    let cargo: PathBuf = env::args_os().nth(2).expect("need path to cargo").into();
-    let output_directory: PathBuf =
-        env::args_os().nth(3).expect("need path to output directory").into();
-    let concurrency: NonZeroUsize =
-        FromStr::from_str(&env::args().nth(4).expect("need concurrency"))
-            .expect("concurrency must be a number");
-    let npm: PathBuf = env::args_os().nth(5).expect("need name/path of npm command").into();
+    let parsed_args = TidyArgParser::parse();
+
+    let root_path = parsed_args.root_path;
+    let cargo = parsed_args.cargo;
+    let output_directory = parsed_args.output_directory;
+    let concurrency = parsed_args.concurrency.get();
+    let npm = parsed_args.npm;

    let root_manifest = root_path.join("Cargo.toml");
    let typos_toml = root_path.join("typos.toml");
@ -41,17 +38,12 @@ fn main() {
    let tools_path = src_path.join("tools");
    let crashes_path = tests_path.join("crashes");

-    let args: Vec<String> = env::args().skip(1).collect();
-    let (cfg_args, pos_args) = match args.iter().position(|arg| arg == "--") {
-        Some(pos) => (&args[..pos], &args[pos + 1..]),
-        None => (&args[..], [].as_slice()),
-    };
-    let verbose = cfg_args.iter().any(|s| *s == "--verbose");
-    let extra_checks =
-        cfg_args.iter().find(|s| s.starts_with("--extra-checks=")).map(String::as_str);
+    let verbose = parsed_args.verbose;
+    let bless = parsed_args.bless;
+    let extra_checks = parsed_args.extra_checks;
+    let pos_args = parsed_args.pos_args;

-    let tidy_flags = TidyFlags::new(cfg_args);
-    let tidy_ctx = TidyCtx::new(&root_path, verbose, tidy_flags);
+    let tidy_ctx = TidyCtx::new(&root_path, verbose, TidyFlags::new(bless));
    let ci_info = CiInfo::new(tidy_ctx.clone());

    let drain_handles = |handles: &mut VecDeque<ScopedJoinHandle<'_, ()>>| {
@ -62,14 +54,13 @@ fn main() {
            }
        }

-        while handles.len() >= concurrency.get() {
+        while handles.len() >= concurrency {
            handles.pop_front().unwrap().join().unwrap();
        }
    };

    scope(|s| {
-        let mut handles: VecDeque<ScopedJoinHandle<'_, ()>> =
-            VecDeque::with_capacity(concurrency.get());
+        let mut handles: VecDeque<ScopedJoinHandle<'_, ()>> = VecDeque::with_capacity(concurrency);

        macro_rules! check {
            ($p:ident) => {