bench: Update the benchmark runner to gungraun 0.17

`iai-callgrind` was renamed to `gungraun` and had a new release. Update everything to match. There shouldn't be any changes to observable behavior here.
2025-12-04 17:14:53 -05:00 · 2025-12-04 17:14:53 -05:00 · 1cfe3585e7
commit 1cfe3585e7
parent 382e9fa4a3
10 changed files with 45 additions and 34 deletions
--- a/library/compiler-builtins/.github/workflows/main.yaml
+++ b/library/compiler-builtins/.github/workflows/main.yaml
@ -247,13 +247,13 @@ jobs:
    - name: Set up dependencies
      run: |
        sudo apt-get update
-        sudo apt-get install -y valgrind gdb libc6-dbg # Needed for iai-callgrind
+        sudo apt-get install -y valgrind gdb libc6-dbg # Needed for gungraun
        rustup update "$BENCHMARK_RUSTC" --no-self-update
        rustup default "$BENCHMARK_RUSTC"
-        # Install the version of iai-callgrind-runner that is specified in Cargo.toml
-        iai_version="$(cargo metadata --format-version=1 --features icount |
-           jq -r '.packages[] | select(.name == "iai-callgrind").version')"
-        cargo binstall -y iai-callgrind-runner --version "$iai_version"
+        # Install the version of gungraun-runner that is specified in Cargo.toml
+        gungraun_version="$(cargo metadata --format-version=1 --features icount |
+           jq -r '.packages[] | select(.name == "gungraun").version')"
+        cargo binstall -y gungraun-runner --version "$gungraun_version"
        sudo apt-get install valgrind
    - uses: Swatinem/rust-cache@v2
      with:
--- a/library/compiler-builtins/.gitignore
+++ b/library/compiler-builtins/.gitignore
@ -9,6 +9,7 @@ compiler-rt
 # Benchmark cache
 baseline-*
 iai-home
+gungraun-home

 # Temporary files
 *.bk
--- a/library/compiler-builtins/CONTRIBUTING.md
+++ b/library/compiler-builtins/CONTRIBUTING.md
@ -150,8 +150,8 @@ cargo bench --no-default-features \
 ```

 There are also benchmarks that check instruction count behind the `icount`
-feature. These require [`iai-callgrind-runner`] (via Cargo) and [Valgrind]
-to be installed, which means these only run on limited platforms.
+feature. These require [`gungraun-runner`] (via Cargo) and [Valgrind] to be
+installed, which means these only run on limited platforms.

 Instruction count benchmarks are run as part of CI to flag performance
 regresions.
@ -163,7 +163,7 @@ cargo bench --no-default-features \
    --bench icount --bench mem_icount
 ```

-[`iai-callgrind-runner`]: https://crates.io/crates/iai-callgrind-runner
+[`gungraun-runner`]: https://crates.io/crates/gungraun-runner
 [Valgrind]: https://valgrind.org/

 ## Subtree synchronization
--- a/library/compiler-builtins/Cargo.toml
+++ b/library/compiler-builtins/Cargo.toml
@ -51,5 +51,5 @@ codegen-units = 1
 lto = "fat"

 [profile.bench]
-# Required for iai-callgrind
+# Required for gungraun
 debug = true
--- a/library/compiler-builtins/builtins-test/Cargo.toml
+++ b/library/compiler-builtins/builtins-test/Cargo.toml
@ -14,7 +14,7 @@ rand_xoshiro = "0.7"
 # To compare float builtins against
 rustc_apfloat = "0.2.3"
 # Really a dev dependency, but dev dependencies can't be optional
-iai-callgrind = { version = "0.15.2", optional = true }
+gungraun = { version = "0.17.0", optional = true }

 [dependencies.compiler_builtins]
 path = "../builtins-shim"
@ -46,8 +46,8 @@ no-sys-f16-f64-convert = []
 # Skip tests that rely on f16 symbols being available on the system
 no-sys-f16 = ["no-sys-f16-f64-convert"]

-# Enable icount benchmarks (requires iai-callgrind and valgrind)
-icount = ["dep:iai-callgrind"]
+# Enable icount benchmarks (requires gungraun-runner and valgrind locally)
+icount = ["dep:gungraun"]

 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
--- a/library/compiler-builtins/builtins-test/benches/mem_icount.rs
+++ b/library/compiler-builtins/builtins-test/benches/mem_icount.rs
@ -1,11 +1,11 @@
-//! Benchmarks that use Callgrind (via `iai_callgrind`) to report instruction count metrics. This
+//! Benchmarks that use Callgrind (via `gungraun`) to report instruction count metrics. This
 //! is stable enough to be tested in CI.

 use std::hint::black_box;
 use std::{ops, slice};

 use compiler_builtins::mem::{memcmp, memcpy, memmove, memset};
-use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use gungraun::{library_benchmark, library_benchmark_group, main};

 const PAGE_SIZE: usize = 0x1000; // 4 kiB
 const MAX_ALIGN: usize = 512; // assume we may use avx512 operations one day
--- a/library/compiler-builtins/ci/bench-icount.sh
+++ b/library/compiler-builtins/ci/bench-icount.sh
@ -10,7 +10,7 @@ if [ -z "$target" ]; then
    target="$host_target"
 fi

-iai_home="iai-home"
+gungraun_home="gungraun-home"

 # Use the arch as a tag to disambiguate artifacts
 tag="$(echo "$target" | cut -d'-' -f1)"
@ -18,6 +18,10 @@ tag="$(echo "$target" | cut -d'-' -f1)"
 # Download the baseline from main
 ./ci/ci-util.py locate-baseline --download --extract --tag "$tag"

+# FIXME: migration from iai-named baselines to gungraun, can be dropped
+# after the first run with gungraun.
+[ -d "iai-home" ] && mv "iai-home" "$gungraun_home"
+
 # Run benchmarks once
 function run_icount_benchmarks() {
    cargo_args=(
@ -26,19 +30,19 @@ function run_icount_benchmarks() {
        "--features" "unstable,unstable-float,icount"
    )

-    iai_args=(
-        "--home" "$(pwd)/$iai_home"
-        "--callgrind-limits=ir=5.0"
+    gungraun_args=(
+        "--home" "$(pwd)/$gungraun_home"
+        "--callgrind-limits=ir=5.0%"
        "--save-summary"
    )

-    # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax
-    parsing_iai_args=0
+    # Parse `cargo_arg0 cargo_arg1 -- gungraun_arg0 gungraun_arg1` syntax
+    parsing_gungraun_args=0
    while [ "$#" -gt 0 ]; do
-        if [ "$parsing_iai_args" == "1" ]; then
-            iai_args+=("$1")
+        if [ "$parsing_gungraun_args" == "1" ]; then
+            gungraun_args+=("$1")
        elif [ "$1" == "--" ]; then
-            parsing_iai_args=1
+            parsing_gungraun_args=1
        else
            cargo_args+=("$1")
        fi
@ -46,9 +50,9 @@ function run_icount_benchmarks() {
        shift
    done

-    # Run iai-callgrind benchmarks. Do this in a subshell with `&& true` to
-    # capture rather than exit on error.
-    (cargo bench "${cargo_args[@]}" -- "${iai_args[@]}") && true
+    # Run gungraun benchmarks. Do this in a subshell with `&& true` to capture
+    # rather than exit on error.
+    (cargo bench "${cargo_args[@]}" -- "${gungraun_args[@]}") && true
    exit_code="$?"

    if [ "$exit_code" -eq 0 ]; then
@ -68,4 +72,4 @@ run_icount_benchmarks -- --save-baseline=hardfloat
 # Name and tar the new baseline
 name="baseline-icount-$tag-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
 echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
-tar cJf "$name.tar.xz" "$iai_home"
+tar cJf "$name.tar.xz" "$gungraun_home"
--- a/library/compiler-builtins/ci/ci-util.py
+++ b/library/compiler-builtins/ci/ci-util.py
@ -38,7 +38,7 @@ USAGE = cleandoc(
            `--tag` can be specified to look for artifacts with a specific tag, such as
            for a specific architecture.

-            Note that `--extract` will overwrite files in `iai-home`.
+            Note that `--extract` will overwrite files in `gungraun-home`.

        handle-bench-regressions PR_NUMBER
            Exit with success if the pull request contains a line starting with
--- a/library/compiler-builtins/libm-test/Cargo.toml
+++ b/library/compiler-builtins/libm-test/Cargo.toml
@ -21,8 +21,8 @@ build-musl = ["dep:musl-math-sys"]
 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]

-# Enable icount benchmarks (requires iai-callgrind and valgrind)
-icount = ["dep:iai-callgrind"]
+# Enable icount benchmarks (requires gungraun-runner and valgrind locally)
+icount = ["dep:gungraun"]

 # Run with a reduced set of benchmarks, such as for CI
 short-benchmarks = []
@ -31,7 +31,7 @@ short-benchmarks = []
 anyhow = "1.0.98"
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { version = "1.6.5", optional = true, default-features = false }
-iai-callgrind = { version = "0.15.2", optional = true }
+gungraun = { version = "0.17.0", optional = true }
 indicatif = { version = "0.18.0", default-features = false }
 libm = { path = "../libm", features = ["unstable-public-internals"] }
 libm-macros = { path = "../crates/libm-macros" }
--- a/library/compiler-builtins/libm-test/benches/icount.rs
+++ b/library/compiler-builtins/libm-test/benches/icount.rs
@ -1,10 +1,10 @@
-//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
+//! Benchmarks that use `gungraun` to be reasonably CI-stable.
 #![feature(f16)]
 #![feature(f128)]

 use std::hint::black_box;

-use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use gungraun::{library_benchmark, library_benchmark_group, main};
 use libm::support::{HInt, Hexf, hf16, hf32, hf64, hf128, u256};
 use libm_test::generate::spaced;
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
@ -156,7 +156,13 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {

 library_benchmark_group!(
    name = icount_bench_u128_group;
-    benchmarks = icount_bench_u128_widen_mul, icount_bench_u256_narrowing_div, icount_bench_u256_add, icount_bench_u256_sub, icount_bench_u256_shl, icount_bench_u256_shr
+    benchmarks =
+    icount_bench_u128_widen_mul,
+    icount_bench_u256_narrowing_div,
+    icount_bench_u256_add,
+    icount_bench_u256_sub,
+    icount_bench_u256_shl,
+    icount_bench_u256_shr
 );

 #[library_benchmark]