From ba0cfe58dd1fc460cbbe5d61f2aa3650d05ec16b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 07:44:13 +0000
Subject: [PATCH 1/4] Run icount benchmarks once with softfloat and once with
 hardfloat

These benchmarks are fast to run, so the time cost here is pretty
minimal. Running softfloat benchmarks just ensures that we don't e.g.
test the performance of `_mm_sqrt_ss` rather than our implementation,
and running without softfloat gives us a way to see the effect of arch
intrinsics.
---
 .../libm/.github/workflows/main.yaml          | 24 +--------
 .../compiler-builtins/libm/ci/bench-icount.sh | 53 +++++++++++++++++++
 2 files changed, 54 insertions(+), 23 deletions(-)
 create mode 100755 library/compiler-builtins/libm/ci/bench-icount.sh

diff --git a/library/compiler-builtins/libm/.github/workflows/main.yaml b/library/compiler-builtins/libm/.github/workflows/main.yaml
index 8c0ff237d2fe..f9d3a5a159cb 100644
--- a/library/compiler-builtins/libm/.github/workflows/main.yaml
+++ b/library/compiler-builtins/libm/.github/workflows/main.yaml
@@ -170,29 +170,7 @@ jobs:
     - name: Run icount benchmarks
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-      run: |
-        set -eux
-        iai_home="iai-home"
-        # Download the baseline from master
-        ./ci/ci-util.py locate-baseline --download --extract
-
-        # Run iai-callgrind benchmarks
-        cargo bench --no-default-features \
-          --features unstable,unstable-float,icount \
-          --bench icount \
-          -- \
-          --save-baseline=default \
-          --home "$(pwd)/$iai_home" \
-          --regression='ir=5.0' \
-          --save-summary
-        # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
-        # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
-        ./ci/ci-util.py check-regressions "$iai_home"
-
-        # Name and tar the new baseline
-        name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
-        echo "BASELINE_NAME=$name" >> "$GITHUB_ENV"
-        tar cJf "$name.tar.xz" "$iai_home"
+      run: ./ci/bench-icount.sh
 
     - name: Upload the benchmark baseline
       uses: actions/upload-artifact@v4
diff --git a/library/compiler-builtins/libm/ci/bench-icount.sh b/library/compiler-builtins/libm/ci/bench-icount.sh
new file mode 100755
index 000000000000..40b3ac95c7bd
--- /dev/null
+++ b/library/compiler-builtins/libm/ci/bench-icount.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+set -eux
+
+iai_home="iai-home"
+
+# Download the baseline from master
+./ci/ci-util.py locate-baseline --download --extract
+
+# Run benchmarks once 
+function run_icount_benchmarks() {
+    cargo_args=(
+        "--bench" "icount"
+        "--no-default-features"
+        "--features" "unstable,unstable-float,icount"
+    )
+
+    iai_args=(
+        "--home" "$(pwd)/$iai_home"
+        "--regression=ir=5.0"
+        "--save-summary"
+    )
+
+    # Parse `cargo_arg0 cargo_arg1 -- iai_arg0 iai_arg1` syntax
+    parsing_iai_args=0
+    while [ "$#" -gt 0 ]; do
+        if [ "$parsing_iai_args" == "1" ]; then
+            iai_args+=("$1")
+        elif [ "$1" == "--" ]; then
+            parsing_iai_args=1
+        else
+            cargo_args+=("$1")
+        fi
+
+        shift
+    done
+
+    # Run iai-callgrind benchmarks
+    cargo bench "${cargo_args[@]}" -- "${iai_args[@]}"
+
+    # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
+    # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
+    ./ci/ci-util.py check-regressions --home "$iai_home" || true
+}
+
+# Run once with softfloats, once with arch instructions enabled
+run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat
+run_icount_benchmarks -- --save-baseline=hardfloat
+
+# Name and tar the new baseline
+name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
+echo "BASELINE_NAME=$name" >>"$GITHUB_ENV"
+tar cJf "$name.tar.xz" "$iai_home"

From c5dc1b8ca027d04245c2b68427f7f42e11c36e33 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 07:45:44 +0000
Subject: [PATCH 2/4] Run wall time benchmarks with `--features
 force-soft-floats`

Similar to changes for `icount` benchmarks, this ensures we aren't
testing the throughput of architecture instructions.
---
 library/compiler-builtins/libm/.github/workflows/main.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/compiler-builtins/libm/.github/workflows/main.yaml b/library/compiler-builtins/libm/.github/workflows/main.yaml
index f9d3a5a159cb..f019c73f8340 100644
--- a/library/compiler-builtins/libm/.github/workflows/main.yaml
+++ b/library/compiler-builtins/libm/.github/workflows/main.yaml
@@ -183,7 +183,7 @@ jobs:
         # Always use the same seed for benchmarks. Ideally we should switch to a
         # non-random generator.
         export LIBM_SEED=benchesbenchesbenchesbencheswoo!
-        cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
+        cargo bench --all --features short-benchmarks,build-musl,force-soft-floats
 
     - name: Print test logs if available
       if: always()

From d3328a0dab30f5be9d896f38c87ac9824a52245b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 07:47:41 +0000
Subject: [PATCH 3/4] Add a way to ignore benchmark regression checks

Introduce a way to ignore the results of icount regression tests, by
specifying `allow-regressions` in the pull request body. This should
apply to both pull requests and the merges based on them, since `gh pr
view` automatically handles both.
---
 .../libm/.github/workflows/main.yaml          |  1 +
 .../compiler-builtins/libm/ci/bench-icount.sh |  7 ++-
 library/compiler-builtins/libm/ci/ci-util.py  | 63 +++++++++++++++----
 3 files changed, 59 insertions(+), 12 deletions(-)

diff --git a/library/compiler-builtins/libm/.github/workflows/main.yaml b/library/compiler-builtins/libm/.github/workflows/main.yaml
index f019c73f8340..7693de6559a1 100644
--- a/library/compiler-builtins/libm/.github/workflows/main.yaml
+++ b/library/compiler-builtins/libm/.github/workflows/main.yaml
@@ -170,6 +170,7 @@ jobs:
     - name: Run icount benchmarks
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        PR_NUMBER: ${{ github.event.pull_request.number }}
       run: ./ci/bench-icount.sh
 
     - name: Upload the benchmark baseline
diff --git a/library/compiler-builtins/libm/ci/bench-icount.sh b/library/compiler-builtins/libm/ci/bench-icount.sh
index 40b3ac95c7bd..3a2155f50113 100755
--- a/library/compiler-builtins/libm/ci/bench-icount.sh
+++ b/library/compiler-builtins/libm/ci/bench-icount.sh
@@ -40,7 +40,12 @@ function run_icount_benchmarks() {
 
     # NB: iai-callgrind should exit on error but does not, so we inspect the sumary
     # for errors. See  https://github.com/iai-callgrind/iai-callgrind/issues/337
-    ./ci/ci-util.py check-regressions --home "$iai_home" || true
+    if [ -n "${PR_NUMBER:-}" ]; then
+        # If this is for a pull request, ignore regressions if specified.
+        ./ci/ci-util.py check-regressions --home "$iai_home" --allow-pr-override "$PR_NUMBER"
+    else
+        ./ci/ci-util.py check-regressions --home "$iai_home" || true
+    fi
 }
 
 # Run once with softfloats, once with arch instructions enabled
diff --git a/library/compiler-builtins/libm/ci/ci-util.py b/library/compiler-builtins/libm/ci/ci-util.py
index 7a9f1bd2bbec..7464fd42595d 100755
--- a/library/compiler-builtins/libm/ci/ci-util.py
+++ b/library/compiler-builtins/libm/ci/ci-util.py
@@ -33,11 +33,14 @@ USAGE = cleandoc(
 
             Note that `--extract` will overwrite files in `iai-home`.
 
-        check-regressions [iai-home]
+        check-regressions [--home iai-home] [--allow-pr-override pr_number]
             Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
             files and see if there are any regressions. This is used as a workaround
             for `iai-callgrind` not exiting with error status; see
             <https://github.com/iai-callgrind/iai-callgrind/issues/337>.
+
+            If `--allow-pr-override` is specified, the regression check will not exit
+            with failure if any line in the PR starts with `allow-regressions`.
     """
 )
 
@@ -46,6 +49,8 @@ GIT = ["git", "-C", REPO_ROOT]
 DEFAULT_BRANCH = "master"
 WORKFLOW_NAME = "CI"  # Workflow that generates the benchmark artifacts
 ARTIFACT_GLOB = "baseline-icount*"
+# Place this in a PR body to skip regression checks (must be at the start of a line).
+REGRESSION_DIRECTIVE = "ci: allow-regressions"
 
 # Don't run exhaustive tests if these files change, even if they contaiin a function
 # definition.
@@ -256,12 +261,26 @@ def locate_baseline(flags: list[str]) -> None:
     eprint("baseline extracted successfully")
 
 
-def check_iai_regressions(iai_home: str | None | Path):
+def check_iai_regressions(args: list[str]):
     """Find regressions in iai summary.json files, exit with failure if any are
     found.
     """
-    if iai_home is None:
-        iai_home = "iai-home"
+
+    iai_home = "iai-home"
+    pr_number = False
+
+    while len(args) > 0:
+        match args:
+            case ["--home", home, *rest]:
+                iai_home = home
+                args = rest
+            case ["--allow-pr-override", pr_num, *rest]:
+                pr_number = pr_num
+                args = rest
+            case _:
+                eprint(USAGE)
+                exit(1)
+
     iai_home = Path(iai_home)
 
     found_summaries = False
@@ -286,9 +305,33 @@ def check_iai_regressions(iai_home: str | None | Path):
         eprint(f"did not find any summary.json files within {iai_home}")
         exit(1)
 
-    if len(regressions) > 0:
-        eprint("Found regressions:", json.dumps(regressions, indent=4))
-        exit(1)
+    if len(regressions) == 0:
+        eprint("No regressions found")
+        return
+
+    eprint("Found regressions:", json.dumps(regressions, indent=4))
+
+    if pr_number is not None:
+        pr_info = sp.check_output(
+            [
+                "gh",
+                "pr",
+                "view",
+                str(pr_number),
+                "--json=number,commits,body,createdAt",
+                "--jq=.commits |= map(.oid)",
+            ],
+            text=True,
+        )
+        pr = json.loads(pr_info)
+        eprint("PR info:", json.dumps(pr, indent=4))
+
+        lines = pr["body"].splitlines()
+        if any(line.startswith(REGRESSION_DIRECTIVE) for line in lines):
+            eprint("PR allows regressions, returning")
+            return
+
+    exit(1)
 
 
 def main():
@@ -299,10 +342,8 @@ def main():
             print(f"matrix={output}")
         case ["locate-baseline", *flags]:
             locate_baseline(flags)
-        case ["check-regressions"]:
-            check_iai_regressions(None)
-        case ["check-regressions", iai_home]:
-            check_iai_regressions(iai_home)
+        case ["check-regressions", *args]:
+            check_iai_regressions(args)
         case ["--help" | "-h"]:
             print(USAGE)
             exit()

From e21618c73e9c1a05aa4d5c75a3d144960d43158b Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross@umich.edu>
Date: Tue, 21 Jan 2025 07:52:26 +0000
Subject: [PATCH 4/4] Ignore files relevant to benchmarking

---
 library/compiler-builtins/libm/.gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/library/compiler-builtins/libm/.gitignore b/library/compiler-builtins/libm/.gitignore
index 4e9c9c03ddf3..a447c34cd0f6 100644
--- a/library/compiler-builtins/libm/.gitignore
+++ b/library/compiler-builtins/libm/.gitignore
@@ -6,3 +6,7 @@ target
 Cargo.lock
 musl/
 **.tar.gz
+
+# Benchmark cache
+iai-home
+baseline-*