From cdb1e680e03bf1ec9330c0951a8ee328f6a272b6 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 07:31:34 +0000 Subject: [PATCH] Run iai-callgrind benchmarks in CI Add support in `ci-util.py` for finding the most recent baseline and downloading it, which new tests can then be compared against. Arbitrarily select nightly-2025-01-16 as the rustc version to pin to in benchmarks. --- .../libm/.github/workflows/main.yaml | 58 ++++++- library/compiler-builtins/libm/ci/ci-util.py | 159 +++++++++++++++++- 2 files changed, 207 insertions(+), 10 deletions(-) diff --git a/library/compiler-builtins/libm/.github/workflows/main.yaml b/library/compiler-builtins/libm/.github/workflows/main.yaml index 99a32a82ec1d..9face93110b1 100644 --- a/library/compiler-builtins/libm/.github/workflows/main.yaml +++ b/library/compiler-builtins/libm/.github/workflows/main.yaml @@ -10,6 +10,7 @@ env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings RUST_BACKTRACE: full + BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results jobs: test: @@ -147,19 +148,70 @@ jobs: benchmarks: name: Benchmarks runs-on: ubuntu-24.04 + timeout-minutes: 20 steps: - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly + - uses: taiki-e/install-action@cargo-binstall + + - name: Set up dependencies + run: | + rustup update "$BENCHMARK_RUSTC" --no-self-update + rustup default "$BENCHMARK_RUSTC" + # Install the version of iai-callgrind-runner that is specified in Cargo.toml + iai_version="$(cargo metadata --format-version=1 --features icount | + jq -r '.packages[] | select(.name == "iai-callgrind").version')" + cargo binstall -y iai-callgrind-runner --version "$iai_version" + sudo apt-get install valgrind + - uses: Swatinem/rust-cache@v2 - name: Download musl source run: ./ci/download-musl.sh - - run: | + + - name: Run icount benchmarks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + iai_home="iai-home" + # Download the baseline from master + ./ci/ci-util.py locate-baseline --download --extract + + # Run iai-callgrind benchmarks + cargo bench --no-default-features \ + --features unstable,unstable-float,icount \ + --bench icount \ + -- \ + --save-baseline=default \ + --home "$(pwd)/$iai_home" \ + --regression='ir=5.0' \ + --save-summary + # NB: iai-callgrind should exit on error but does not, so we inspect the sumary + # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 + ./ci/ci-util.py check-regressions "$iai_home" + + # Name and tar the new baseline + name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" + echo "BASELINE_NAME=$name" >> "$GITHUB_ENV" + tar cJf "$name.tar.xz" "$iai_home" + + - name: Upload the benchmark baseline + uses: actions/upload-artifact@v4 + with: + name: ${{ env.BASELINE_NAME }} + path: ${{ env.BASELINE_NAME }}.tar.xz + + - name: Run wall time benchmarks + run: | # Always use the same seed for benchmarks. Ideally we should switch to a # non-random generator. export LIBM_SEED=benchesbenchesbenchesbencheswoo! cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + msrv: name: Check MSRV runs-on: ubuntu-24.04 diff --git a/library/compiler-builtins/libm/ci/ci-util.py b/library/compiler-builtins/libm/ci/ci-util.py index 733ec26fa33c..1ec69b002aed 100755 --- a/library/compiler-builtins/libm/ci/ci-util.py +++ b/library/compiler-builtins/libm/ci/ci-util.py @@ -9,6 +9,7 @@ import json import subprocess as sp import sys from dataclasses import dataclass +from glob import glob, iglob from inspect import cleandoc from os import getenv from pathlib import Path @@ -18,16 +19,33 @@ USAGE = cleandoc( """ usage: - ./ci/ci-util.py + ./ci/ci-util.py [flags] - SUBCOMMAND: - generate-matrix Calculate a matrix of which functions had source change, - print that as JSON object. + COMMAND: + generate-matrix + Calculate a matrix of which functions had source change, print that as + a JSON object. + + locate-baseline [--download] [--extract] + Locate the most recent benchmark baseline available in CI and, if flags + specify, download and extract it. Never exits with nonzero status if + downloading fails. + + Note that `--extract` will overwrite files in `iai-home`. + + check-regressions [iai-home] + Check `iai-home` (or `iai-home` if unspecified) for `summary.json` + files and see if there are any regressions. This is used as a workaround + for `iai-callgrind` not exiting with error status; see + . """ ) REPO_ROOT = Path(__file__).parent.parent GIT = ["git", "-C", REPO_ROOT] +DEFAULT_BRANCH = "master" +WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts +ARTIFACT_GLOB = "baseline-icount*" # Don't run exhaustive tests if these files change, even if they contaiin a function # definition. @@ -40,6 +58,11 @@ IGNORE_FILES = [ TYPES = ["f16", "f32", "f64", "f128"] +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + class FunctionDef(TypedDict): """Type for an entry in `function-definitions.json`""" @@ -145,9 +168,125 @@ class Context: return output -def eprint(*args, **kwargs): - """Print to stderr.""" - print(*args, file=sys.stderr, **kwargs) +def locate_baseline(flags: list[str]) -> None: + """Find the most recent baseline from CI, download it if specified. + + This returns rather than erroring, even if the `gh` commands fail. This is to avoid + erroring in CI if the baseline is unavailable (artifact time limit exceeded, first + run on the branch, etc). + """ + + download = False + extract = False + + while len(flags) > 0: + match flags[0]: + case "--download": + download = True + case "--extract": + extract = True + case _: + eprint(USAGE) + exit(1) + flags = flags[1:] + + if extract and not download: + eprint("cannot extract without downloading") + exit(1) + + try: + # Locate the most recent job to complete with success on our branch + latest_job = sp.check_output( + [ + "gh", + "run", + "list", + "--limit=1", + "--status=success", + f"--branch={DEFAULT_BRANCH}", + "--json=databaseId,url,headSha,conclusion,createdAt," + "status,workflowDatabaseId,workflowName", + f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")', + ], + text=True, + ) + eprint(f"latest: '{latest_job}'") + except sp.CalledProcessError as e: + eprint(f"failed to run github command: {e}") + return + + try: + latest = json.loads(latest_job)[0] + eprint("latest job: ", json.dumps(latest, indent=4)) + except json.JSONDecodeError as e: + eprint(f"failed to decode json '{latest_job}', {e}") + return + + if not download: + eprint("--download not specified, returning") + return + + job_id = latest.get("databaseId") + if job_id is None: + eprint("skipping download step") + return + + sp.run( + ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"], + check=False, + ) + + if not extract: + eprint("skipping extraction step") + return + + # Find the baseline with the most recent timestamp. GH downloads the files to e.g. + # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together. + candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}") + if len(candidate_baselines) == 0: + eprint("no possible baseline directories found") + return + + candidate_baselines.sort(reverse=True) + baseline_archive = candidate_baselines[0] + eprint(f"extracting {baseline_archive}") + sp.run(["tar", "xJvf", baseline_archive], check=True) + eprint("baseline extracted successfully") + + +def check_iai_regressions(iai_home: str | None | Path): + """Find regressions in iai summary.json files, exit with failure if any are + found. + """ + if iai_home is None: + iai_home = "iai-home" + iai_home = Path(iai_home) + + found_summaries = False + regressions = [] + for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True): + found_summaries = True + with open(iai_home / summary_path, "r") as f: + summary = json.load(f) + + summary_regs = [] + run = summary["callgrind_summary"]["callgrind_run"] + name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"} + + for segment in run["segments"]: + summary_regs.extend(segment["regressions"]) + + summary_regs.extend(run["total"]["regressions"]) + + regressions.extend(name_entry | reg for reg in summary_regs) + + if not found_summaries: + eprint(f"did not find any summary.json files within {iai_home}") + exit(1) + + if len(regressions) > 0: + eprint("Found regressions:", json.dumps(regressions, indent=4)) + exit(1) def main(): @@ -156,6 +295,12 @@ def main(): ctx = Context() output = ctx.make_workflow_output() print(f"matrix={output}") + case ["locate-baseline", *flags]: + locate_baseline(flags) + case ["check-regressions"]: + check_iai_regressions(None) + case ["check-regressions", iai_home]: + check_iai_regressions(iai_home) case ["--help" | "-h"]: print(USAGE) exit()