From f56b41dbbdc660f303e0946c50be8989e3657689 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 07:30:51 +0000
Subject: [PATCH 1/5] Provide a way to override iteration count

Benchmarks need a way to limit how many iterations get run. Introuce a
way to inject this information here.
---
 .../libm/crates/libm-test/src/run_cfg.rs      | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)
diff --git a/library/compiler-builtins/libm/crates/libm-test/src/run_cfg.rs b/library/compiler-builtins/libm/crates/libm-test/src/run_cfg.rs
index 6763de8bcc8d..3e91101f6cac 100644
--- a/library/compiler-builtins/libm/crates/libm-test/src/run_cfg.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/src/run_cfg.rs
@@ -40,6 +40,8 @@ pub struct CheckCtx {
     /// Source of truth for tests.
     pub basis: CheckBasis,
     pub gen_kind: GeneratorKind,
+    /// If specified, this value will override the value returned by [`iteration_count`].
+    pub override_iterations: Option<u64>,
 }
 
 impl CheckCtx {
@@ -53,6 +55,7 @@ impl CheckCtx {
             base_name_str: fn_ident.base_name().as_str(),
             basis,
             gen_kind,
+            override_iterations: None,
         };
         ret.ulp = crate::default_ulp(&ret);
         ret
@@ -62,6 +65,10 @@ impl CheckCtx {
     pub fn input_count(&self) -> usize {
         self.fn_ident.math_op().rust_sig.args.len()
     }
+
+    pub fn override_iterations(&mut self, count: u64) {
+        self.override_iterations = Some(count)
+    }
 }
 
 /// Possible items to test against
@@ -71,6 +78,8 @@ pub enum CheckBasis {
     Musl,
     /// Check against infinite precision (MPFR).
     Mpfr,
+    /// Benchmarks or other times when this is not relevant.
+    None,
 }
 
 /// The different kinds of generators that provide test input, which account for input pattern
@@ -216,6 +225,12 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         total_iterations = 800;
     }
 
+    let mut overridden = false;
+    if let Some(count) = ctx.override_iterations {
+        total_iterations = count;
+        overridden = true;
+    }
+
     // Adjust for the number of inputs
     let ntests = match t_env.input_count {
         1 => total_iterations,
@@ -223,6 +238,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         3 => (total_iterations as f64).cbrt().ceil() as u64,
         _ => panic!("test has more than three arguments"),
     };
+
     let total = ntests.pow(t_env.input_count.try_into().unwrap());
 
     let seed_msg = match ctx.gen_kind {
@@ -235,12 +251,13 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
 
     test_log(&format!(
         "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \
-         ({total} total){seed_msg}",
+         ({total} total){seed_msg}{omsg}",
         gen_kind = ctx.gen_kind,
         basis = ctx.basis,
         fn_ident = ctx.fn_ident,
         arg = argnum + 1,
         args = t_env.input_count,
+        omsg = if overridden { " (overridden)" } else { "" }
     ));
 
     ntests

From 490ebbb187ff309db6599eb1616b100114dc834d Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 07:30:38 +0000
Subject: [PATCH 2/5] Add benchmarks using iai-callgrind

Running walltime benchmarks in CI is notoriously unstable, Introduce
benchmarks that instead use instruction count and other more
reproducible metrics, using `iai-callgrind` [1], which we are able to
run in CI with a high degree of reproducibility.

Inputs to this benchmark are a logspace sweep, which gives an
approximation for real-world use, but may fail to indicate outlier
cases.

[1]: https://github.com/iai-callgrind/iai-callgrind
---
 library/compiler-builtins/libm/Cargo.toml     |   4 +
 .../libm/crates/libm-test/Cargo.toml          |   9 +
 .../libm/crates/libm-test/benches/icount.rs   | 175 ++++++++++++++++++
 .../libm/crates/libm-test/src/lib.rs          |   3 +-
 .../libm/crates/libm-test/src/op.rs           |   2 +
 5 files changed, 192 insertions(+), 1 deletion(-)
 create mode 100644 library/compiler-builtins/libm/crates/libm-test/benches/icount.rs

diff --git a/library/compiler-builtins/libm/Cargo.toml b/library/compiler-builtins/libm/Cargo.toml
index f84f3eac6a4d..18d89997dcba 100644
--- a/library/compiler-builtins/libm/Cargo.toml
+++ b/library/compiler-builtins/libm/Cargo.toml
@@ -73,3 +73,7 @@ debug-assertions = true
 inherits = "release"
 lto = "fat"
 overflow-checks = true
+
+[profile.bench]
+# Required for iai-callgrind
+debug = true
diff --git a/library/compiler-builtins/libm/crates/libm-test/Cargo.toml b/library/compiler-builtins/libm/crates/libm-test/Cargo.toml
index d3f18ab3e5bc..3a1ba87962a7 100644
--- a/library/compiler-builtins/libm/crates/libm-test/Cargo.toml
+++ b/library/compiler-builtins/libm/crates/libm-test/Cargo.toml
@@ -20,6 +20,9 @@ build-musl = ["dep:musl-math-sys"]
 # Enable report generation without bringing in more dependencies by default
 benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
 
+# Enable icount benchmarks (requires iai-callgrind and valgrind)
+icount = ["dep:iai-callgrind"]
+
 # Run with a reduced set of benchmarks, such as for CI
 short-benchmarks = []
 
@@ -27,6 +30,7 @@ short-benchmarks = []
 anyhow = "1.0.90"
 az = { version = "1.2.1", optional = true }
 gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] }
+iai-callgrind = { version = "0.14.0", optional = true }
 indicatif = { version = "0.17.9", default-features = false }
 libm = { path = "../..", features = ["unstable-public-internals"] }
 libm-macros = { path = "../libm-macros" }
@@ -48,6 +52,11 @@ rand = { version = "0.8.5", optional = true }
 criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
 libtest-mimic = "0.8.1"
 
+[[bench]]
+name = "icount"
+harness = false
+required-features = ["icount"]
+
 [[bench]]
 name = "random"
 harness = false
diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs
new file mode 100644
index 000000000000..3a66249e85dc
--- /dev/null
+++ b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs
@@ -0,0 +1,175 @@
+//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
+
+use std::hint::black_box;
+
+use iai_callgrind::{library_benchmark, library_benchmark_group, main};
+use libm_test::gen::spaced;
+use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
+
+const BENCH_ITER_ITEMS: u64 = 500;
+
+macro_rules! icount_benches {
+    (
+        fn_name: $fn_name:ident,
+        attrs: [$($_attr:meta),*],
+    ) => {
+        paste::paste! {
+            // Construct benchmark inputs from the logspace generator.
+            fn [< setup_ $fn_name >]() -> Vec<OpRustArgs<op::$fn_name::Routine>> {
+                type Op = op::$fn_name::Routine;
+                let mut ctx = CheckCtx::new(
+                    Op::IDENTIFIER,
+                    CheckBasis::None,
+                    GeneratorKind::QuickSpaced
+                );
+                ctx.override_iterations(BENCH_ITER_ITEMS);
+                let ret = spaced::get_test_cases::<Op>(&ctx).0.collect::<Vec<_>>();
+                println!("operation {}, {} steps", Op::NAME, ret.len());
+                ret
+            }
+
+            // Run benchmarks with the above inputs.
+            #[library_benchmark]
+            #[bench::logspace([< setup_ $fn_name >]())]
+            fn [< icount_bench_ $fn_name >](cases: Vec<OpRustArgs<op::$fn_name::Routine>>) {
+                type Op = op::$fn_name::Routine;
+                let f = black_box(Op::ROUTINE);
+                for input in cases.iter().copied() {
+                    input.call(f);
+                }
+            }
+
+            library_benchmark_group!(
+                name = [< icount_bench_ $fn_name _group  >];
+                benchmarks = [< icount_bench_ $fn_name >]
+            );
+        }
+    };
+}
+
+libm_macros::for_each_function! {
+    callback: icount_benches,
+}
+
+main!(
+    library_benchmark_groups = icount_bench_acos_group,
+    icount_bench_acosf_group,
+    icount_bench_acosh_group,
+    icount_bench_acoshf_group,
+    icount_bench_asin_group,
+    icount_bench_asinf_group,
+    icount_bench_asinh_group,
+    icount_bench_asinhf_group,
+    icount_bench_atan2_group,
+    icount_bench_atan2f_group,
+    icount_bench_atan_group,
+    icount_bench_atanf_group,
+    icount_bench_atanh_group,
+    icount_bench_atanhf_group,
+    icount_bench_cbrt_group,
+    icount_bench_cbrtf_group,
+    icount_bench_ceil_group,
+    icount_bench_ceilf_group,
+    icount_bench_copysign_group,
+    icount_bench_copysignf128_group,
+    icount_bench_copysignf16_group,
+    icount_bench_copysignf_group,
+    icount_bench_cos_group,
+    icount_bench_cosf_group,
+    icount_bench_cosh_group,
+    icount_bench_coshf_group,
+    icount_bench_erf_group,
+    icount_bench_erfc_group,
+    icount_bench_erfcf_group,
+    icount_bench_erff_group,
+    icount_bench_exp10_group,
+    icount_bench_exp10f_group,
+    icount_bench_exp2_group,
+    icount_bench_exp2f_group,
+    icount_bench_exp_group,
+    icount_bench_expf_group,
+    icount_bench_expm1_group,
+    icount_bench_expm1f_group,
+    icount_bench_fabs_group,
+    icount_bench_fabsf128_group,
+    icount_bench_fabsf16_group,
+    icount_bench_fabsf_group,
+    icount_bench_fdim_group,
+    icount_bench_fdimf128_group,
+    icount_bench_fdimf16_group,
+    icount_bench_fdimf_group,
+    icount_bench_floor_group,
+    icount_bench_floorf_group,
+    icount_bench_fma_group,
+    icount_bench_fmaf_group,
+    icount_bench_fmax_group,
+    icount_bench_fmaxf_group,
+    icount_bench_fmin_group,
+    icount_bench_fminf_group,
+    icount_bench_fmod_group,
+    icount_bench_fmodf_group,
+    icount_bench_frexp_group,
+    icount_bench_frexpf_group,
+    icount_bench_hypot_group,
+    icount_bench_hypotf_group,
+    icount_bench_ilogb_group,
+    icount_bench_ilogbf_group,
+    icount_bench_j0_group,
+    icount_bench_j0f_group,
+    icount_bench_j1_group,
+    icount_bench_j1f_group,
+    icount_bench_jn_group,
+    icount_bench_jnf_group,
+    icount_bench_ldexp_group,
+    icount_bench_ldexpf_group,
+    icount_bench_lgamma_group,
+    icount_bench_lgamma_r_group,
+    icount_bench_lgammaf_group,
+    icount_bench_lgammaf_r_group,
+    icount_bench_log10_group,
+    icount_bench_log10f_group,
+    icount_bench_log1p_group,
+    icount_bench_log1pf_group,
+    icount_bench_log2_group,
+    icount_bench_log2f_group,
+    icount_bench_log_group,
+    icount_bench_logf_group,
+    icount_bench_modf_group,
+    icount_bench_modff_group,
+    icount_bench_nextafter_group,
+    icount_bench_nextafterf_group,
+    icount_bench_pow_group,
+    icount_bench_powf_group,
+    icount_bench_remainder_group,
+    icount_bench_remainderf_group,
+    icount_bench_remquo_group,
+    icount_bench_remquof_group,
+    icount_bench_rint_group,
+    icount_bench_rintf_group,
+    icount_bench_round_group,
+    icount_bench_roundf_group,
+    icount_bench_scalbn_group,
+    icount_bench_scalbnf_group,
+    icount_bench_sin_group,
+    icount_bench_sinf_group,
+    icount_bench_sinh_group,
+    icount_bench_sinhf_group,
+    icount_bench_sqrt_group,
+    icount_bench_sqrtf_group,
+    icount_bench_tan_group,
+    icount_bench_tanf_group,
+    icount_bench_tanh_group,
+    icount_bench_tanhf_group,
+    icount_bench_tgamma_group,
+    icount_bench_tgammaf_group,
+    icount_bench_trunc_group,
+    icount_bench_truncf128_group,
+    icount_bench_truncf16_group,
+    icount_bench_truncf_group,
+    icount_bench_y0_group,
+    icount_bench_y0f_group,
+    icount_bench_y1_group,
+    icount_bench_y1f_group,
+    icount_bench_yn_group,
+    icount_bench_ynf_group,
+);
diff --git a/library/compiler-builtins/libm/crates/libm-test/src/lib.rs b/library/compiler-builtins/libm/crates/libm-test/src/lib.rs
index cb89f1c8ba7f..b90423c1bacc 100644
--- a/library/compiler-builtins/libm/crates/libm-test/src/lib.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/src/lib.rs
@@ -24,7 +24,8 @@ pub use f8_impl::f8;
 pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, linear_ints, logspace};
 pub use op::{
-    BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty,
+    BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet,
+    Ty,
 };
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 use run_cfg::EXTENSIVE_MAX_ITERATIONS;
diff --git a/library/compiler-builtins/libm/crates/libm-test/src/op.rs b/library/compiler-builtins/libm/crates/libm-test/src/op.rs
index 8329d3424c32..239c9a3e1fdf 100644
--- a/library/compiler-builtins/libm/crates/libm-test/src/op.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/src/op.rs
@@ -100,6 +100,8 @@ pub type OpCFn<Op> = <Op as MathOp>::CFn;
 pub type OpCRet<Op> = <Op as MathOp>::CRet;
 /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
 pub type OpRustFn<Op> = <Op as MathOp>::RustFn;
+/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types).
+pub type OpRustArgs<Op> = <Op as MathOp>::RustArgs;
 /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
 pub type OpRustRet<Op> = <Op as MathOp>::RustRet;
 

From cdb1e680e03bf1ec9330c0951a8ee328f6a272b6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 07:31:34 +0000
Subject: [PATCH 3/5] Run iai-callgrind benchmarks in CI

Add support in `ci-util.py` for finding the most recent baseline and
downloading it, which new tests can then be compared against.

Arbitrarily select nightly-2025-01-16 as the rustc version to pin to in
benchmarks.
---
 .../libm/.github/workflows/main.yaml          |  58 ++++++-
 library/compiler-builtins/libm/ci/ci-util.py  | 159 +++++++++++++++++-
 2 files changed, 207 insertions(+), 10 deletions(-)

diff --git a/library/compiler-builtins/libm/.github/workflows/main.yaml b/library/compiler-builtins/libm/.github/workflows/main.yaml
index 99a32a82ec1d..9face93110b1 100644
--- a/library/compiler-builtins/libm/.github/workflows/main.yaml
+++ b/library/compiler-builtins/libm/.github/workflows/main.yaml
@@ -10,6 +10,7 @@ env:
   RUSTDOCFLAGS: -Dwarnings
   RUSTFLAGS: -Dwarnings
   RUST_BACKTRACE: full
+  BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results
 
 jobs:
   test:
@@ -147,19 +148,70 @@ jobs:
   benchmarks:
     name: Benchmarks
     runs-on: ubuntu-24.04
+    timeout-minutes: 20
     steps:
     - uses: actions/checkout@master
-    - name: Install Rust
-      run: rustup update nightly --no-self-update && rustup default nightly
+    - uses: taiki-e/install-action@cargo-binstall
+
+    - name: Set up dependencies
+      run: |
+        rustup update "$BENCHMARK_RUSTC" --no-self-update
+        rustup default "$BENCHMARK_RUSTC"
+        # Install the version of iai-callgrind-runner that is specified in Cargo.toml
+        iai_version="$(cargo metadata --format-version=1 --features icount |
+           jq -r '.packages[] | select(.name == "iai-callgrind").version')"
+        cargo binstall -y iai-callgrind-runner --version "$iai_version"
+        sudo apt-get install valgrind
+
     - uses: Swatinem/rust-cache@v2
     - name: Download musl source
       run: ./ci/download-musl.sh
-    - run: |
+
+    - name: Run icount benchmarks
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        set -eux
+        iai_home="iai-home"
+        # Download the baseline from master
+        ./ci/ci-util.py locate-baseline --download --extract
+
+        # Run iai-callgrind benchmarks
+        cargo bench --no-default-features \
+          --features unstable,unstable-float,icount \
+          --bench icount \
+          -- \
+          --save-baseline=default \
+          --home "$(pwd)/$iai_home" \
+          --regression='ir=5.0' \
+          --save-summary
+        # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
+        # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
+        ./ci/ci-util.py check-regressions "$iai_home"
+
+        # Name and tar the new baseline
+        name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
+        echo "BASELINE_NAME=$name" >> "$GITHUB_ENV"
+        tar cJf "$name.tar.xz" "$iai_home"
+
+    - name: Upload the benchmark baseline
+      uses: actions/upload-artifact@v4
+      with:
+        name: ${{ env.BASELINE_NAME }}
+        path: ${{ env.BASELINE_NAME }}.tar.xz
+    
+    - name: Run wall time benchmarks
+      run: |
         # Always use the same seed for benchmarks. Ideally we should switch to a
         # non-random generator.
         export LIBM_SEED=benchesbenchesbenchesbencheswoo!
         cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
 
+    - name: Print test logs if available
+      if: always()
+      run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
+      shell: bash
+
   msrv:
     name: Check MSRV
     runs-on: ubuntu-24.04
diff --git a/library/compiler-builtins/libm/ci/ci-util.py b/library/compiler-builtins/libm/ci/ci-util.py
index 733ec26fa33c..1ec69b002aed 100755
--- a/library/compiler-builtins/libm/ci/ci-util.py
+++ b/library/compiler-builtins/libm/ci/ci-util.py
@@ -9,6 +9,7 @@ import json
 import subprocess as sp
 import sys
 from dataclasses import dataclass
+from glob import glob, iglob
 from inspect import cleandoc
 from os import getenv
 from pathlib import Path
@@ -18,16 +19,33 @@ USAGE = cleandoc(
     """
     usage:
 
-    ./ci/ci-util.py <SUBCOMMAND>
+    ./ci/ci-util.py <COMMAND> [flags]
 
-    SUBCOMMAND:
-        generate-matrix    Calculate a matrix of which functions had source change,
-                           print that as JSON object.
+    COMMAND:
+        generate-matrix
+            Calculate a matrix of which functions had source change, print that as
+             a JSON object.
+
+        locate-baseline [--download] [--extract]
+            Locate the most recent benchmark baseline available in CI and, if flags
+            specify, download and extract it. Never exits with nonzero status if
+            downloading fails.
+
+            Note that `--extract` will overwrite files in `iai-home`.
+
+        check-regressions [iai-home]
+            Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
+            files and see if there are any regressions. This is used as a workaround
+            for `iai-callgrind` not exiting with error status; see
+            <https://github.com/iai-callgrind/iai-callgrind/issues/337>.
     """
 )
 
 REPO_ROOT = Path(__file__).parent.parent
 GIT = ["git", "-C", REPO_ROOT]
+DEFAULT_BRANCH = "master"
+WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
+ARTIFACT_GLOB = "baseline-icount*"
 
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
@@ -40,6 +58,11 @@ IGNORE_FILES = [
 TYPES = ["f16", "f32", "f64", "f128"]
 
 
+def eprint(*args, **kwargs):
+    """Print to stderr."""
+    print(*args, file=sys.stderr, **kwargs)
+
+
 class FunctionDef(TypedDict):
     """Type for an entry in `function-definitions.json`"""
 
@@ -145,9 +168,125 @@ class Context:
         return output
 
 
-def eprint(*args, **kwargs):
-    """Print to stderr."""
-    print(*args, file=sys.stderr, **kwargs)
+def locate_baseline(flags: list[str]) -> None:
+    """Find the most recent baseline from CI, download it if specified.
+
+    This returns rather than erroring, even if the `gh` commands fail. This is to avoid
+    erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
+    run on the branch, etc).
+    """
+
+    download = False
+    extract = False
+
+    while len(flags) > 0:
+        match flags[0]:
+            case "--download":
+                download = True
+            case "--extract":
+                extract = True
+            case _:
+                eprint(USAGE)
+                exit(1)
+        flags = flags[1:]
+
+    if extract and not download:
+        eprint("cannot extract without downloading")
+        exit(1)
+
+    try:
+        # Locate the most recent job to complete with success on our branch
+        latest_job = sp.check_output(
+            [
+                "gh",
+                "run",
+                "list",
+                "--limit=1",
+                "--status=success",
+                f"--branch={DEFAULT_BRANCH}",
+                "--json=databaseId,url,headSha,conclusion,createdAt,"
+                "status,workflowDatabaseId,workflowName",
+                f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")',
+            ],
+            text=True,
+        )
+        eprint(f"latest: '{latest_job}'")
+    except sp.CalledProcessError as e:
+        eprint(f"failed to run github command: {e}")
+        return
+
+    try:
+        latest = json.loads(latest_job)[0]
+        eprint("latest job: ", json.dumps(latest, indent=4))
+    except json.JSONDecodeError as e:
+        eprint(f"failed to decode json '{latest_job}', {e}")
+        return
+
+    if not download:
+        eprint("--download not specified, returning")
+        return
+
+    job_id = latest.get("databaseId")
+    if job_id is None:
+        eprint("skipping download step")
+        return
+
+    sp.run(
+        ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
+        check=False,
+    )
+
+    if not extract:
+        eprint("skipping extraction step")
+        return
+
+    # Find the baseline with the most recent timestamp. GH downloads the files to e.g.
+    # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
+    candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
+    if len(candidate_baselines) == 0:
+        eprint("no possible baseline directories found")
+        return
+
+    candidate_baselines.sort(reverse=True)
+    baseline_archive = candidate_baselines[0]
+    eprint(f"extracting {baseline_archive}")
+    sp.run(["tar", "xJvf", baseline_archive], check=True)
+    eprint("baseline extracted successfully")
+
+
+def check_iai_regressions(iai_home: str | None | Path):
+    """Find regressions in iai summary.json files, exit with failure if any are
+    found.
+    """
+    if iai_home is None:
+        iai_home = "iai-home"
+    iai_home = Path(iai_home)
+
+    found_summaries = False
+    regressions = []
+    for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
+        found_summaries = True
+        with open(iai_home / summary_path, "r") as f:
+            summary = json.load(f)
+
+        summary_regs = []
+        run = summary["callgrind_summary"]["callgrind_run"]
+        name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"}
+
+        for segment in run["segments"]:
+            summary_regs.extend(segment["regressions"])
+
+        summary_regs.extend(run["total"]["regressions"])
+
+        regressions.extend(name_entry | reg for reg in summary_regs)
+
+    if not found_summaries:
+        eprint(f"did not find any summary.json files within {iai_home}")
+        exit(1)
+
+    if len(regressions) > 0:
+        eprint("Found regressions:", json.dumps(regressions, indent=4))
+        exit(1)
 
 
 def main():
@@ -156,6 +295,12 @@ def main():
             ctx = Context()
             output = ctx.make_workflow_output()
             print(f"matrix={output}")
+        case ["locate-baseline", *flags]:
+            locate_baseline(flags)
+        case ["check-regressions"]:
+            check_iai_regressions(None)
+        case ["check-regressions", iai_home]:
+            check_iai_regressions(iai_home)
         case ["--help" | "-h"]:
             print(USAGE)
             exit()

From 5139ba6f46923dc673a7449c09d396cba2a6eba6 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 07:31:07 +0000
Subject: [PATCH 4/5] Reduce the warm up and measurement time for
 `short-benchmarks`

The icount benchmarks are what we will be relying on in CI more than the
existing benchmarks. There isn't much reason to keep these around, but
there isn't much point in dropping them either. So, just reduce the
runtime.
---
 .../compiler-builtins/libm/crates/libm-test/benches/random.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs
index dcc7c1acac36..888161265100 100644
--- a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs
@@ -151,8 +151,8 @@ pub fn musl_random() {
     // about the same time as other tests.
     if cfg!(feature = "short-benchmarks") {
         criterion = criterion
-            .warm_up_time(Duration::from_millis(500))
-            .measurement_time(Duration::from_millis(1000));
+            .warm_up_time(Duration::from_millis(200))
+            .measurement_time(Duration::from_millis(600));
     }
 
     criterion = criterion.configure_from_args();

From 3986206ce0f601d747953333bc82b836820443e9 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Thu, 16 Jan 2025 09:47:00 +0000
Subject: [PATCH 5/5] Add an xfail for recent ynf failures

This failed a couple of times recently in CI, once on i686 and once on
aarch64-apple:

    thread 'main' panicked at crates/libm-test/benches/random.rs:76:65:
    called `Result::unwrap()` on an `Err` value: ynf

    Caused by:
        0:
               input:    (681, 509.90924) (0x000002a9, 0x43fef462)
               expected: -3.2161271e38          0xff71f45b
               actual:   -inf                   0xff800000
        1: mismatched infinities

    thread 'main' panicked at crates/libm-test/benches/random.rs:76:65:
    called `Result::unwrap()` on an `Err` value: ynf

    Caused by:
        0:
               input:    (132, 50.46604) (0x00000084, 0x4249dd3a)
               expected: -3.3364996e38          0xff7b02a5
               actual:   -inf                   0xff800000
        1: mismatched infinities

Add a new override to account for this.
---
 .../libm/crates/libm-test/src/precision.rs    | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/library/compiler-builtins/libm/crates/libm-test/src/precision.rs b/library/compiler-builtins/libm/crates/libm-test/src/precision.rs
index 3cb5e420f260..9d17ab8cccfd 100644
--- a/library/compiler-builtins/libm/crates/libm-test/src/precision.rs
+++ b/library/compiler-builtins/libm/crates/libm-test/src/precision.rs
@@ -4,6 +4,7 @@
 use core::f32;
 
 use CheckBasis::{Mpfr, Musl};
+use libm::support::CastFrom;
 use {BaseName as Bn, Identifier as Id};
 
 use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
@@ -524,7 +525,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         match (&ctx.basis, ctx.base_name) {
-            (Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
+            (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx),
 
             // We return +0.0, MPFR returns -0.0
             (Mpfr, BaseName::Jn | BaseName::Yn)
@@ -554,7 +555,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
         ctx: &CheckCtx,
     ) -> Option<TestResult> {
         match (&ctx.basis, ctx.base_name) {
-            (Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
+            (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx),
 
             // We return +0.0, MPFR returns -0.0
             (Mpfr, BaseName::Jn | BaseName::Yn)
@@ -569,8 +570,10 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
 }
 
 /// Our bessel functions blow up with large N values
-fn bessel_prec_dropoff<F: Float>(
-    input: (i32, F),
+fn bessel_prec_dropoff<F1: Float, F2: Float>(
+    input: (i32, F1),
+    actual: F2,
+    expected: F2,
     ulp: &mut u32,
     ctx: &CheckCtx,
 ) -> Option<TestResult> {
@@ -585,6 +588,17 @@ fn bessel_prec_dropoff<F: Float>(
         }
     }
 
+    // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should
+    // be -3.2161271e38.
+    if ctx.fn_ident == Identifier::Ynf
+        && !expected.is_infinite()
+        && actual.is_infinite()
+        && (expected.abs().to_bits().abs_diff(actual.abs().to_bits())
+            < F2::Int::cast_from(1_000_000u32))
+    {
+        return XFAIL;
+    }
+
     None
 }