From 1cfe3585e723f40537130e17f04b5e26cd0c306b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 4 Dec 2025 17:14:53 -0500
Subject: [PATCH] bench: Update the benchmark runner to gungraun 0.17

`iai-callgrind` was renamed to `gungraun` and had a new release. Update
everything to match.

There shouldn't be any changes to observable behavior here.
---
 .../.github/workflows/main.yaml               | 10 +++----
 library/compiler-builtins/.gitignore          |  1 +
 library/compiler-builtins/CONTRIBUTING.md     |  6 ++--
 library/compiler-builtins/Cargo.toml          |  2 +-
 .../builtins-test/Cargo.toml                  |  6 ++--
 .../builtins-test/benches/mem_icount.rs       |  4 +--
 library/compiler-builtins/ci/bench-icount.sh  | 30 +++++++++++--------
 library/compiler-builtins/ci/ci-util.py       |  2 +-
 .../compiler-builtins/libm-test/Cargo.toml    |  6 ++--
 .../libm-test/benches/icount.rs               | 12 ++++++--
 10 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/library/compiler-builtins/.github/workflows/main.yaml b/library/compiler-builtins/.github/workflows/main.yaml
index 4ed85ee69c6c..63809fe04b59 100644
--- a/library/compiler-builtins/.github/workflows/main.yaml
+++ b/library/compiler-builtins/.github/workflows/main.yaml
@@ -247,13 +247,13 @@ jobs:
     - name: Set up dependencies
       run: |
         sudo apt-get update
-        sudo apt-get install -y valgrind gdb libc6-dbg # Needed for iai-callgrind
+        sudo apt-get install -y valgrind gdb libc6-dbg # Needed for gungraun
         rustup update "$BENCHMARK_RUSTC" --no-self-update
         rustup default "$BENCHMARK_RUSTC"
-        # Install the version of iai-callgrind-runner that is specified in Cargo.toml
-        iai_version="$(cargo metadata --format-version=1 --features icount |
-           jq -r '.packages[] | select(.name == "iai-callgrind").version')"
-        cargo binstall -y iai-callgrind-runner --version "$iai_version"
+        # Install the version of gungraun-runner that is specified in Cargo.toml
+        gungraun_version="$(cargo metadata --format-version=1 --features icount |
+           jq -r '.packages[] | select(.name == "gungraun").version')"
+        cargo binstall -y gungraun-runner --version "$gungraun_version"
         sudo apt-get install valgrind
     - uses: Swatinem/rust-cache@v2
       with:
diff --git a/library/compiler-builtins/.gitignore b/library/compiler-builtins/.gitignore
index f12b871c2f78..abe346659d4c 100644
--- a/library/compiler-builtins/.gitignore
+++ b/library/compiler-builtins/.gitignore
@@ -9,6 +9,7 @@ compiler-rt
 # Benchmark cache
 baseline-*
 iai-home
+gungraun-home
 
 # Temporary files
 *.bk
diff --git a/library/compiler-builtins/CONTRIBUTING.md b/library/compiler-builtins/CONTRIBUTING.md
index 9ae4f893c60d..f74d3f8ba127 100644
--- a/library/compiler-builtins/CONTRIBUTING.md
+++ b/library/compiler-builtins/CONTRIBUTING.md
@@ -150,8 +150,8 @@ cargo bench --no-default-features \
 ```
 
 There are also benchmarks that check instruction count behind the `icount`
-feature. These require [`iai-callgrind-runner`] (via Cargo) and [Valgrind]
-to be installed, which means these only run on limited platforms.
+feature. These require [`gungraun-runner`] (via Cargo) and [Valgrind] to be
+installed, which means these only run on limited platforms.
 
 Instruction count benchmarks are run as part of CI to flag performance
 regresions.
@@ -163,7 +163,7 @@ cargo bench --no-default-features \
     --bench icount --bench mem_icount
 ```
 
-[`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner
+[`gungraun-runner`]: https://crates.io/crates/gungraun-runner
 [Valgrind]: https://valgrind.org/
 
 ## Subtree synchronization
diff --git a/library/compiler-builtins/Cargo.toml b/library/compiler-builtins/Cargo.toml
index 956d738f3b1f..6b4e691a1ebe 100644
--- a/library/compiler-builtins/Cargo.toml
+++ b/library/compiler-builtins/Cargo.toml
@@ -51,5 +51,5 @@ codegen-units = 1
 lto = "fat"
 
 [profile.bench]
-# Required for iai-callgrind
+# Required for gungraun
 debug = true
diff --git a/library/compiler-builtins/builtins-test/Cargo.toml b/library/compiler-builtins/builtins-test/Cargo.toml
index 00a9d8579d11..2bcffe349318 100644
--- a/library/compiler-builtins/builtins-test/Cargo.toml
+++ b/library/compiler-builtins/builtins-test/Cargo.toml
@@ -14,7 +14,7 @@ rand_xoshiro = "0.7"
 # To compare float builtins against
 rustc_apfloat = "0.2.3"
 # Really a dev dependency, but dev dependencies can't be optional
-iai-callgrind = { version = "0.15.2", optional = true }
+gungraun = { version = "0.17.0", optional = true }
 
 [dependencies.compiler_builtins]
 path = "../builtins-shim"
@@ -46,8 +46,8 @@ no-sys-f16-f64-convert = []
 # Skip tests that rely on f16 symbols being available on the system
 no-sys-f16 = ["no-sys-f16-f64-convert"]
 
-# Enable icount benchmarks (requires iai-callgrind and valgrind)
-icount = ["dep:iai-callgrind"]
+# Enable icount benchmarks (requires gungraun-runner and valgrind locally)
+icount = ["dep:gungraun"]
 
 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
diff --git a/library/compiler-builtins/builtins-test/benches/mem_icount.rs b/library/compiler-builtins/builtins-test/benches/mem_icount.rs
index bd88cf80c7de..966ceea86d8d 100644
--- a/library/compiler-builtins/builtins-test/benches/mem_icount.rs
+++ b/library/compiler-builtins/builtins-test/benches/mem_icount.rs
@@ -1,11 +1,11 @@
-//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This
+//! Benchmarks that use Callgrind (via `gungraun`) to report instruction count metrics. This
 //! is stable enough to be tested in CI.
 
 use std::hint::black_box;
 use std::{ops, slice};
 
 use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
-use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use gungraun::{library_benchmark, library_benchmark_group, main};
 
 const PAGE_SIZE: usize = 0x1000; // 4 kiB
 const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day
diff --git a/library/compiler-builtins/ci/bench-icount.sh b/library/compiler-builtins/ci/bench-icount.sh
index 64405fdd02dc..56aa1df07e46 100755
--- a/library/compiler-builtins/ci/bench-icount.sh
+++ b/library/compiler-builtins/ci/bench-icount.sh
@@ -10,7 +10,7 @@ if [ -z "$target" ]; then
     target="$host_target"
 fi
 
-iai_home="iai-home"
+gungraun_home="gungraun-home"
 
 # Use the arch as a tag to disambiguate artifacts
 tag="$(echo "$target" | cut -d'-' -f1)"
@@ -18,6 +18,10 @@ tag="$(echo "$target" | cut -d'-' -f1)"
 # Download the baseline from main
 ./ci/ci-util.py locate-baseline --download --extract --tag "$tag"
 
+# FIXME: migration from iai-named baselines to gungraun, can be dropped
+# after the first run with gungraun.
+[ -d "iai-home" ] && mv "iai-home" "$gungraun_home"
+
 # Run benchmarks once
 function run_icount_benchmarks() {
     cargo_args=(
@@ -26,19 +30,19 @@ function run_icount_benchmarks() {
         "--features" "unstable,unstable-float,icount"
     )
 
-    iai_args=(
-        "--home" "$(pwd)/$iai_home"
-        "--callgrind-limits=ir=5.0"
+    gungraun_args=(
+        "--home" "$(pwd)/$gungraun_home"
+        "--callgrind-limits=ir=5.0%"
         "--save-summary"
     )
 
-    # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax
-    parsing_iai_args=0
+    # Parse `cargo_arg0 cargo_arg1 -- gungraun_arg0 gungraun_arg1` syntax
+    parsing_gungraun_args=0
     while [ "$#" -gt 0 ]; do
-        if [ "$parsing_iai_args" == "1" ]; then
-            iai_args+=("$1")
+        if [ "$parsing_gungraun_args" == "1" ]; then
+            gungraun_args+=("$1")
         elif [ "$1" == "--" ]; then
-            parsing_iai_args=1
+            parsing_gungraun_args=1
         else
             cargo_args+=("$1")
         fi
@@ -46,9 +50,9 @@ function run_icount_benchmarks() {
         shift
     done
 
-    # Run iai-callgrind benchmarks. Do this in a subshell with `&& true` to
-    # capture rather than exit on error.
-    (cargo bench "${cargo_args[@]}" -- "${iai_args[@]}") && true
+    # Run gungraun benchmarks. Do this in a subshell with `&& true` to capture
+    # rather than exit on error.
+    (cargo bench "${cargo_args[@]}" -- "${gungraun_args[@]}") && true
     exit_code="$?"
 
     if [ "$exit_code" -eq 0 ]; then
@@ -68,4 +72,4 @@ run_icount_benchmarks -- --save-baseline=hardfloat
 # Name and tar the new baseline
 name="baseline-icount-$tag-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
 echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
-tar cJf "$name.tar.xz" "$iai_home"
+tar cJf "$name.tar.xz" "$gungraun_home"
diff --git a/library/compiler-builtins/ci/ci-util.py b/library/compiler-builtins/ci/ci-util.py
index cb582db35194..113820b70215 100755
--- a/library/compiler-builtins/ci/ci-util.py
+++ b/library/compiler-builtins/ci/ci-util.py
@@ -38,7 +38,7 @@ USAGE = cleandoc(
             `--tag` can be specified to look for artifacts with a specific tag, such as
             for a specific architecture.
 
-            Note that `--extract` will overwrite files in `iai-home`.
+            Note that `--extract` will overwrite files in `gungraun-home`.
 
         handle-bench-regressions PR_NUMBER
             Exit with success if the pull request contains a line starting with
diff --git a/library/compiler-builtins/libm-test/Cargo.toml b/library/compiler-builtins/libm-test/Cargo.toml
index 0af6b0c1da5c..adecfc1af6b8 100644
--- a/library/compiler-builtins/libm-test/Cargo.toml
+++ b/library/compiler-builtins/libm-test/Cargo.toml
@@ -21,8 +21,8 @@ build-musl = ["dep:musl-math-sys"]
 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
 
-# Enable icount benchmarks (requires iai-callgrind and valgrind)
-icount = ["dep:iai-callgrind"]
+# Enable icount benchmarks (requires gungraun-runner and valgrind locally)
+icount = ["dep:gungraun"]
 
 # Run with a reduced set of benchmarks, such as for CI
 short-benchmarks = []
@@ -31,7 +31,7 @@ short-benchmarks = []
 anyhow = "1.0.98"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false }
-iai-callgrind = { version = "0.15.2", optional = true }
+gungraun = { version = "0.17.0", optional = true }
 indicatif = { version = "0.18.0", default-features = false }
 libm = { path = "../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../crates/libm-macros" }
diff --git a/library/compiler-builtins/libm-test/benches/icount.rs b/library/compiler-builtins/libm-test/benches/icount.rs
index 0b85771225dd..fb856d9be451 100644
--- a/library/compiler-builtins/libm-test/benches/icount.rs
+++ b/library/compiler-builtins/libm-test/benches/icount.rs
@@ -1,10 +1,10 @@
-//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
+//! Benchmarks that use `gungraun` to be reasonably CI-stable.
 #![feature(f16)]
 #![feature(f128)]
 
 use std::hint::black_box;
 
-use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use gungraun::{library_benchmark, library_benchmark_group, main};
 use libm::support::{HInt, Hexf, hf16, hf32, hf64, hf128, u256};
 use libm_test::generate::spaced;
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
@@ -156,7 +156,13 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
 
 library_benchmark_group!(
     name = icount_bench_u128_group;
-    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_narrowing_div, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
+    benchmarks =
+    icount_bench_u128_widen_mul,
+    icount_bench_u256_narrowing_div,
+    icount_bench_u256_add,
+    icount_bench_u256_sub,
+    icount_bench_u256_shl,
+    icount_bench_u256_shr
 );
 
 #[library_benchmark]