Run iai-callgrind benchmarks in CI

Add support in `ci-util.py` for finding the most recent baseline and
downloading it, which new tests can then be compared against.

Arbitrarily select nightly-2025-01-16 as the rustc version to pin to in
benchmarks.
This commit is contained in:
Trevor Gross 2025-01-16 07:31:34 +00:00
parent 490ebbb187
commit cdb1e680e0
2 changed files with 207 additions and 10 deletions

View file

@ -10,6 +10,7 @@ env:
RUSTDOCFLAGS: -Dwarnings
RUSTFLAGS: -Dwarnings
RUST_BACKTRACE: full
BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results
jobs:
test:
@ -147,19 +148,70 @@ jobs:
benchmarks:
name: Benchmarks
runs-on: ubuntu-24.04
timeout-minutes: 20
steps:
- uses: actions/checkout@master
- name: Install Rust
run: rustup update nightly --no-self-update && rustup default nightly
- uses: taiki-e/install-action@cargo-binstall
- name: Set up dependencies
run: |
rustup update "$BENCHMARK_RUSTC" --no-self-update
rustup default "$BENCHMARK_RUSTC"
# Install the version of iai-callgrind-runner that is specified in Cargo.toml
iai_version="$(cargo metadata --format-version=1 --features icount |
jq -r '.packages[] | select(.name == "iai-callgrind").version')"
cargo binstall -y iai-callgrind-runner --version "$iai_version"
sudo apt-get install valgrind
- uses: Swatinem/rust-cache@v2
- name: Download musl source
run: ./ci/download-musl.sh
- run: |
- name: Run icount benchmarks
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -eux
iai_home="iai-home"
# Download the baseline from master
./ci/ci-util.py locate-baseline --download --extract
# Run iai-callgrind benchmarks
cargo bench --no-default-features \
--features unstable,unstable-float,icount \
--bench icount \
-- \
--save-baseline=default \
--home "$(pwd)/$iai_home" \
--regression='ir=5.0' \
--save-summary
# NB: iai-callgrind should exit on error but does not, so we inspect the sumary
# for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337
./ci/ci-util.py check-regressions "$iai_home"
# Name and tar the new baseline
name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
echo "BASELINE_NAME=$name" >> "$GITHUB_ENV"
tar cJf "$name.tar.xz" "$iai_home"
- name: Upload the benchmark baseline
uses: actions/upload-artifact@v4
with:
name: ${{ env.BASELINE_NAME }}
path: ${{ env.BASELINE_NAME }}.tar.xz
- name: Run wall time benchmarks
run: |
# Always use the same seed for benchmarks. Ideally we should switch to a
# non-random generator.
export LIBM_SEED=benchesbenchesbenchesbencheswoo!
cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
- name: Print test logs if available
if: always()
run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
shell: bash
msrv:
name: Check MSRV
runs-on: ubuntu-24.04

View file

@ -9,6 +9,7 @@ import json
import subprocess as sp
import sys
from dataclasses import dataclass
from glob import glob, iglob
from inspect import cleandoc
from os import getenv
from pathlib import Path
@ -18,16 +19,33 @@ USAGE = cleandoc(
"""
usage:
./ci/ci-util.py <SUBCOMMAND>
./ci/ci-util.py <COMMAND> [flags]
SUBCOMMAND:
generate-matrix Calculate a matrix of which functions had source change,
print that as JSON object.
COMMAND:
generate-matrix
Calculate a matrix of which functions had source change, print that as
a JSON object.
locate-baseline [--download] [--extract]
Locate the most recent benchmark baseline available in CI and, if flags
specify, download and extract it. Never exits with nonzero status if
downloading fails.
Note that `--extract` will overwrite files in `iai-home`.
check-regressions [iai-home]
Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
files and see if there are any regressions. This is used as a workaround
for `iai-callgrind` not exiting with error status; see
<https://github.com/iai-callgrind/iai-callgrind/issues/337>.
"""
)
REPO_ROOT = Path(__file__).parent.parent
GIT = ["git", "-C", REPO_ROOT]
DEFAULT_BRANCH = "master"
WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts
ARTIFACT_GLOB = "baseline-icount*"
# Don't run exhaustive tests if these files change, even if they contaiin a function
# definition.
@ -40,6 +58,11 @@ IGNORE_FILES = [
TYPES = ["f16", "f32", "f64", "f128"]
def eprint(*args, **kwargs):
"""Print to stderr."""
print(*args, file=sys.stderr, **kwargs)
class FunctionDef(TypedDict):
"""Type for an entry in `function-definitions.json`"""
@ -145,9 +168,125 @@ class Context:
return output
def eprint(*args, **kwargs):
"""Print to stderr."""
print(*args, file=sys.stderr, **kwargs)
def locate_baseline(flags: list[str]) -> None:
"""Find the most recent baseline from CI, download it if specified.
This returns rather than erroring, even if the `gh` commands fail. This is to avoid
erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
run on the branch, etc).
"""
download = False
extract = False
while len(flags) > 0:
match flags[0]:
case "--download":
download = True
case "--extract":
extract = True
case _:
eprint(USAGE)
exit(1)
flags = flags[1:]
if extract and not download:
eprint("cannot extract without downloading")
exit(1)
try:
# Locate the most recent job to complete with success on our branch
latest_job = sp.check_output(
[
"gh",
"run",
"list",
"--limit=1",
"--status=success",
f"--branch={DEFAULT_BRANCH}",
"--json=databaseId,url,headSha,conclusion,createdAt,"
"status,workflowDatabaseId,workflowName",
f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")',
],
text=True,
)
eprint(f"latest: '{latest_job}'")
except sp.CalledProcessError as e:
eprint(f"failed to run github command: {e}")
return
try:
latest = json.loads(latest_job)[0]
eprint("latest job: ", json.dumps(latest, indent=4))
except json.JSONDecodeError as e:
eprint(f"failed to decode json '{latest_job}', {e}")
return
if not download:
eprint("--download not specified, returning")
return
job_id = latest.get("databaseId")
if job_id is None:
eprint("skipping download step")
return
sp.run(
["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
check=False,
)
if not extract:
eprint("skipping extraction step")
return
# Find the baseline with the most recent timestamp. GH downloads the files to e.g.
# `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
if len(candidate_baselines) == 0:
eprint("no possible baseline directories found")
return
candidate_baselines.sort(reverse=True)
baseline_archive = candidate_baselines[0]
eprint(f"extracting {baseline_archive}")
sp.run(["tar", "xJvf", baseline_archive], check=True)
eprint("baseline extracted successfully")
def check_iai_regressions(iai_home: str | None | Path):
"""Find regressions in iai summary.json files, exit with failure if any are
found.
"""
if iai_home is None:
iai_home = "iai-home"
iai_home = Path(iai_home)
found_summaries = False
regressions = []
for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
found_summaries = True
with open(iai_home / summary_path, "r") as f:
summary = json.load(f)
summary_regs = []
run = summary["callgrind_summary"]["callgrind_run"]
name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"}
for segment in run["segments"]:
summary_regs.extend(segment["regressions"])
summary_regs.extend(run["total"]["regressions"])
regressions.extend(name_entry | reg for reg in summary_regs)
if not found_summaries:
eprint(f"did not find any summary.json files within {iai_home}")
exit(1)
if len(regressions) > 0:
eprint("Found regressions:", json.dumps(regressions, indent=4))
exit(1)
def main():
@ -156,6 +295,12 @@ def main():
ctx = Context()
output = ctx.make_workflow_output()
print(f"matrix={output}")
case ["locate-baseline", *flags]:
locate_baseline(flags)
case ["check-regressions"]:
check_iai_regressions(None)
case ["check-regressions", iai_home]:
check_iai_regressions(iai_home)
case ["--help" | "-h"]:
print(USAGE)
exit()