Merge pull request #4104 from RalfJung/bench

Provide a way to compare benchmark results with baseline
2024-12-22 20:13:50 +00:00 · 2024-12-22 20:13:50 +00:00 · bba6f0a6b2
commit bba6f0a6b2
parent 58ad698df7 41f3edc2af
5 changed files with 111 additions and 10 deletions
--- a/src/tools/miri/CONTRIBUTING.md
+++ b/src/tools/miri/CONTRIBUTING.md
@ -212,6 +212,15 @@ Miri comes with a few benchmarks; you can run `./miri bench` to run them with th
 Miri. Note: this will run `./miri install` as a side-effect. Also requires `hyperfine` to be
 installed (`cargo install hyperfine`).

+To compare the benchmark results with a baseline, do the following:
+- Before applying your changes, run `./miri bench --save-baseline=baseline.json`.
+- Then do your changes.
+- Then run `./miri bench --load-baseline=baseline.json`; the results will include
+  a comparison with the baseline.
+
+You can run only some of the benchmarks by listing them, e.g. `./miri bench mse`.
+The names refer to the folders in `bench-cargo-miri`.
+
 ## Configuring `rust-analyzer`

 To configure `rust-analyzer` and the IDE for working on Miri, copy one of the provided
--- a/src/tools/miri/miri-script/Cargo.lock
+++ b/src/tools/miri/miri-script/Cargo.lock
@ -250,6 +250,8 @@ dependencies = [
 "itertools",
 "path_macro",
 "rustc_version",
+ "serde",
+ "serde_derive",
 "serde_json",
 "shell-words",
 "tempfile",
--- a/src/tools/miri/miri-script/Cargo.toml
+++ b/src/tools/miri/miri-script/Cargo.toml
@ -23,6 +23,8 @@ xshell = "0.2.6"
 rustc_version = "0.4"
 dunce = "1.0.4"
 directories = "5"
+serde = "1"
 serde_json = "1"
+serde_derive = "1"
 tempfile = "3.13.0"
 clap = { version = "4.5.21", features = ["derive"] }
--- a/src/tools/miri/miri-script/src/commands.rs
+++ b/src/tools/miri/miri-script/src/commands.rs
@ -1,5 +1,7 @@
+use std::collections::HashMap;
 use std::ffi::{OsStr, OsString};
-use std::io::Write;
+use std::fs::File;
+use std::io::{BufReader, BufWriter, Write};
 use std::ops::{Not, Range};
 use std::path::PathBuf;
 use std::time::Duration;
@ -7,6 +9,8 @@ use std::{env, net, process};

 use anyhow::{Context, Result, anyhow, bail};
 use path_macro::path;
+use serde_derive::{Deserialize, Serialize};
+use tempfile::TempDir;
 use walkdir::WalkDir;
 use xshell::{Shell, cmd};

@ -179,8 +183,8 @@ impl Command {
            Command::Doc { flags } => Self::doc(flags),
            Command::Fmt { flags } => Self::fmt(flags),
            Command::Clippy { flags } => Self::clippy(flags),
-            Command::Bench { target, no_install, benches } =>
-                Self::bench(target, no_install, benches),
+            Command::Bench { target, no_install, save_baseline, load_baseline, benches } =>
+                Self::bench(target, no_install, save_baseline, load_baseline, benches),
            Command::Toolchain { flags } => Self::toolchain(flags),
            Command::RustcPull { commit } => Self::rustc_pull(commit.clone()),
            Command::RustcPush { github_user, branch } => Self::rustc_push(github_user, branch),
@ -379,7 +383,17 @@ impl Command {
        Ok(())
    }

-    fn bench(target: Option<String>, no_install: bool, benches: Vec<String>) -> Result<()> {
+    fn bench(
+        target: Option<String>,
+        no_install: bool,
+        save_baseline: Option<String>,
+        load_baseline: Option<String>,
+        benches: Vec<String>,
+    ) -> Result<()> {
+        if save_baseline.is_some() && load_baseline.is_some() {
+            bail!("Only one of `--save-baseline` and `--load-baseline` can be set");
+        }
+
        // The hyperfine to use
        let hyperfine = env::var("HYPERFINE");
        let hyperfine = hyperfine.as_deref().unwrap_or("hyperfine -w 1 -m 5 --shell=none");
@ -387,19 +401,26 @@ impl Command {
        let Some((program_name, args)) = hyperfine.split_first() else {
            bail!("expected HYPERFINE environment variable to be non-empty");
        };
+
        if !no_install {
            // Make sure we have an up-to-date Miri installed and selected the right toolchain.
            Self::install(vec![])?;
        }
+        let results_json_dir = if save_baseline.is_some() || load_baseline.is_some() {
+            Some(TempDir::new()?)
+        } else {
+            None
+        };

+        let miri_dir = miri_dir()?;
        let sh = Shell::new()?;
-        sh.change_dir(miri_dir()?);
+        sh.change_dir(&miri_dir);
        let benches_dir = "bench-cargo-miri";
-        let benches: Vec<OsString> = if benches.is_empty() {
+        let benches: Vec<String> = if benches.is_empty() {
            sh.read_dir(benches_dir)?
                .into_iter()
                .filter(|path| path.is_dir())
-                .map(Into::into)
+                .map(|path| path.into_os_string().into_string().unwrap())
                .collect()
        } else {
            benches.into_iter().map(Into::into).collect()
@ -414,16 +435,75 @@ impl Command {
        let target_flag = &target_flag;
        let toolchain = active_toolchain()?;
        // Run the requested benchmarks
-        for bench in benches {
+        for bench in &benches {
            let current_bench = path!(benches_dir / bench / "Cargo.toml");
+            let mut export_json = None;
+            if let Some(baseline_temp_dir) = &results_json_dir {
+                export_json = Some(format!(
+                    "--export-json={}",
+                    path!(baseline_temp_dir / format!("{bench}.bench.json")).display()
+                ));
+            }
            // We don't attempt to escape `current_bench`, but we wrap it in quotes.
            // That seems to make Windows CI happy.
            cmd!(
                sh,
-                "{program_name} {args...} 'cargo +'{toolchain}' miri run '{target_flag}' --manifest-path \"'{current_bench}'\"'"
+                "{program_name} {args...} {export_json...} 'cargo +'{toolchain}' miri run '{target_flag}' --manifest-path \"'{current_bench}'\"'"
            )
            .run()?;
        }
+
+        // Gather/load results for baseline saving.
+
+        #[derive(Serialize, Deserialize)]
+        struct BenchResult {
+            mean: f64,
+            stddev: f64,
+        }
+
+        let gather_results = || -> Result<HashMap<&str, BenchResult>> {
+            let baseline_temp_dir = results_json_dir.unwrap();
+            let mut results = HashMap::new();
+            for bench in &benches {
+                let result = File::open(path!(baseline_temp_dir / format!("{bench}.bench.json")))?;
+                let mut result: serde_json::Value =
+                    serde_json::from_reader(BufReader::new(result))?;
+                let result: BenchResult = serde_json::from_value(result["results"][0].take())?;
+                results.insert(bench as &str, result);
+            }
+            Ok(results)
+        };
+
+        if let Some(baseline_file) = save_baseline {
+            let results = gather_results()?;
+            let baseline = File::create(baseline_file)?;
+            serde_json::to_writer_pretty(BufWriter::new(baseline), &results)?;
+        } else if let Some(baseline_file) = load_baseline {
+            let new_results = gather_results()?;
+            let baseline_results: HashMap<String, BenchResult> = {
+                let f = File::open(baseline_file)?;
+                serde_json::from_reader(BufReader::new(f))?
+            };
+            println!(
+                "Comparison with baseline (relative speed, lower is better for the new results):"
+            );
+            for (bench, new_result) in new_results.iter() {
+                let Some(baseline_result) = baseline_results.get(*bench) else { continue };
+
+                // Compare results (inspired by hyperfine)
+                let ratio = new_result.mean / baseline_result.mean;
+                // https://en.wikipedia.org/wiki/Propagation_of_uncertainty#Example_formulae
+                // Covariance asssumed to be 0, i.e. variables are assumed to be independent
+                let ratio_stddev = ratio
+                    * f64::sqrt(
+                        (new_result.stddev / new_result.mean).powi(2)
+                            + (baseline_result.stddev / baseline_result.mean).powi(2),
+                    );
+
+                println!("  {bench}: {ratio:.2} ± {ratio_stddev:.2}");
+            }
+        }
+
        Ok(())
    }

--- a/src/tools/miri/miri-script/src/main.rs
+++ b/src/tools/miri/miri-script/src/main.rs
@ -1,4 +1,4 @@
-#![allow(clippy::needless_question_mark)]
+#![allow(clippy::needless_question_mark, rustc::internal)]

 mod commands;
 mod coverage;
@ -117,6 +117,14 @@ pub enum Command {
        /// When `true`, skip the `./miri install` step.
        #[arg(long)]
        no_install: bool,
+        /// Store the benchmark result in the given file, so it can be used
+        /// as the baseline for a future run.
+        #[arg(long)]
+        save_baseline: Option<String>,
+        /// Load previous stored benchmark results as baseline, and print an analysis of how the
+        /// current run compares.
+        #[arg(long)]
+        load_baseline: Option<String>,
        /// List of benchmarks to run (default: run all benchmarks).
        benches: Vec<String>,
    },