Merge pull request rust-lang/libm#447 from tgross35/icount-benchmarks

Add benchmarks using iai-callgrind
This commit is contained in:
Trevor Gross 2025-01-16 04:14:56 -06:00 committed by GitHub
commit 753af94f1f
10 changed files with 437 additions and 18 deletions

View file

@ -10,6 +10,7 @@ env:
RUSTDOCFLAGS: -Dwarnings
RUSTFLAGS: -Dwarnings
RUST_BACKTRACE: full
BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results
jobs:
test:
@ -147,19 +148,70 @@ jobs:
benchmarks:
name: Benchmarks
runs-on: ubuntu-24.04
timeout-minutes: 20
steps:
- uses: actions/checkout@master
- name: Install Rust
run: rustup update nightly --no-self-update && rustup default nightly
- uses: taiki-e/install-action@cargo-binstall
- name: Set up dependencies
run: |
rustup update "$BENCHMARK_RUSTC" --no-self-update
rustup default "$BENCHMARK_RUSTC"
# Install the version of iai-callgrind-runner that is specified in Cargo.toml
iai_version="$(cargo metadata --format-version=1 --features icount |
jq -r '.packages[] | select(.name == "iai-callgrind").version')"
cargo binstall -y iai-callgrind-runner --version "$iai_version"
sudo apt-get install valgrind
- uses: Swatinem/rust-cache@v2
- name: Download musl source
run: ./ci/download-musl.sh
- run: |
- name: Run icount benchmarks
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -eux
iai_home="iai-home"
# Download the baseline from master
./ci/ci-util.py locate-baseline --download --extract
# Run iai-callgrind benchmarks
cargo bench --no-default-features \
--features unstable,unstable-float,icount \
--bench icount \
-- \
--save-baseline=default \
--home "$(pwd)/$iai_home" \
--regression='ir=5.0' \
--save-summary
# NB: iai-callgrind should exit on error but does not, so we inspect the sumary
# for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337
./ci/ci-util.py check-regressions "$iai_home"
# Name and tar the new baseline
name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
echo "BASELINE_NAME=$name" >> "$GITHUB_ENV"
tar cJf "$name.tar.xz" "$iai_home"
- name: Upload the benchmark baseline
uses: actions/upload-artifact@v4
with:
name: ${{ env.BASELINE_NAME }}
path: ${{ env.BASELINE_NAME }}.tar.xz
- name: Run wall time benchmarks
run: |
# Always use the same seed for benchmarks. Ideally we should switch to a
# non-random generator.
export LIBM_SEED=benchesbenchesbenchesbencheswoo!
cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl
- name: Print test logs if available
if: always()
run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
shell: bash
msrv:
name: Check MSRV
runs-on: ubuntu-24.04

View file

@ -73,3 +73,7 @@ debug-assertions = true
inherits = "release"
lto = "fat"
overflow-checks = true
[profile.bench]
# Required for iai-callgrind
debug = true

View file

@ -9,6 +9,7 @@ import json
import subprocess as sp
import sys
from dataclasses import dataclass
from glob import glob, iglob
from inspect import cleandoc
from os import getenv
from pathlib import Path
@ -18,16 +19,33 @@ USAGE = cleandoc(
"""
usage:
./ci/ci-util.py <SUBCOMMAND>
./ci/ci-util.py <COMMAND> [flags]
SUBCOMMAND:
generate-matrix Calculate a matrix of which functions had source change,
print that as JSON object.
COMMAND:
generate-matrix
Calculate a matrix of which functions had source change, print that as
a JSON object.
locate-baseline [--download] [--extract]
Locate the most recent benchmark baseline available in CI and, if flags
specify, download and extract it. Never exits with nonzero status if
downloading fails.
Note that `--extract` will overwrite files in `iai-home`.
check-regressions [iai-home]
Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
files and see if there are any regressions. This is used as a workaround
for `iai-callgrind` not exiting with error status; see
<https://github.com/iai-callgrind/iai-callgrind/issues/337>.
"""
)
REPO_ROOT = Path(__file__).parent.parent
GIT = ["git", "-C", REPO_ROOT]
DEFAULT_BRANCH = "master"
WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts
ARTIFACT_GLOB = "baseline-icount*"
# Don't run exhaustive tests if these files change, even if they contaiin a function
# definition.
@ -40,6 +58,11 @@ IGNORE_FILES = [
TYPES = ["f16", "f32", "f64", "f128"]
def eprint(*args, **kwargs):
"""Print to stderr."""
print(*args, file=sys.stderr, **kwargs)
class FunctionDef(TypedDict):
"""Type for an entry in `function-definitions.json`"""
@ -145,9 +168,125 @@ class Context:
return output
def eprint(*args, **kwargs):
"""Print to stderr."""
print(*args, file=sys.stderr, **kwargs)
def locate_baseline(flags: list[str]) -> None:
"""Find the most recent baseline from CI, download it if specified.
This returns rather than erroring, even if the `gh` commands fail. This is to avoid
erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
run on the branch, etc).
"""
download = False
extract = False
while len(flags) > 0:
match flags[0]:
case "--download":
download = True
case "--extract":
extract = True
case _:
eprint(USAGE)
exit(1)
flags = flags[1:]
if extract and not download:
eprint("cannot extract without downloading")
exit(1)
try:
# Locate the most recent job to complete with success on our branch
latest_job = sp.check_output(
[
"gh",
"run",
"list",
"--limit=1",
"--status=success",
f"--branch={DEFAULT_BRANCH}",
"--json=databaseId,url,headSha,conclusion,createdAt,"
"status,workflowDatabaseId,workflowName",
f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")',
],
text=True,
)
eprint(f"latest: '{latest_job}'")
except sp.CalledProcessError as e:
eprint(f"failed to run github command: {e}")
return
try:
latest = json.loads(latest_job)[0]
eprint("latest job: ", json.dumps(latest, indent=4))
except json.JSONDecodeError as e:
eprint(f"failed to decode json '{latest_job}', {e}")
return
if not download:
eprint("--download not specified, returning")
return
job_id = latest.get("databaseId")
if job_id is None:
eprint("skipping download step")
return
sp.run(
["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
check=False,
)
if not extract:
eprint("skipping extraction step")
return
# Find the baseline with the most recent timestamp. GH downloads the files to e.g.
# `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
if len(candidate_baselines) == 0:
eprint("no possible baseline directories found")
return
candidate_baselines.sort(reverse=True)
baseline_archive = candidate_baselines[0]
eprint(f"extracting {baseline_archive}")
sp.run(["tar", "xJvf", baseline_archive], check=True)
eprint("baseline extracted successfully")
def check_iai_regressions(iai_home: str | None | Path):
"""Find regressions in iai summary.json files, exit with failure if any are
found.
"""
if iai_home is None:
iai_home = "iai-home"
iai_home = Path(iai_home)
found_summaries = False
regressions = []
for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
found_summaries = True
with open(iai_home / summary_path, "r") as f:
summary = json.load(f)
summary_regs = []
run = summary["callgrind_summary"]["callgrind_run"]
name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"}
for segment in run["segments"]:
summary_regs.extend(segment["regressions"])
summary_regs.extend(run["total"]["regressions"])
regressions.extend(name_entry | reg for reg in summary_regs)
if not found_summaries:
eprint(f"did not find any summary.json files within {iai_home}")
exit(1)
if len(regressions) > 0:
eprint("Found regressions:", json.dumps(regressions, indent=4))
exit(1)
def main():
@ -156,6 +295,12 @@ def main():
ctx = Context()
output = ctx.make_workflow_output()
print(f"matrix={output}")
case ["locate-baseline", *flags]:
locate_baseline(flags)
case ["check-regressions"]:
check_iai_regressions(None)
case ["check-regressions", iai_home]:
check_iai_regressions(iai_home)
case ["--help" | "-h"]:
print(USAGE)
exit()

View file

@ -20,6 +20,9 @@ build-musl = ["dep:musl-math-sys"]
# Enable report generation without bringing in more dependencies by default
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
# Enable icount benchmarks (requires iai-callgrind and valgrind)
icount = ["dep:iai-callgrind"]
# Run with a reduced set of benchmarks, such as for CI
short-benchmarks = []
@ -27,6 +30,7 @@ short-benchmarks = []
anyhow = "1.0.90"
az = { version = "1.2.1", optional = true }
gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] }
iai-callgrind = { version = "0.14.0", optional = true }
indicatif = { version = "0.17.9", default-features = false }
libm = { path = "../..", features = ["unstable-public-internals"] }
libm-macros = { path = "../libm-macros" }
@ -48,6 +52,11 @@ rand = { version = "0.8.5", optional = true }
criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
libtest-mimic = "0.8.1"
[[bench]]
name = "icount"
harness = false
required-features = ["icount"]
[[bench]]
name = "random"
harness = false

View file

@ -0,0 +1,175 @@
//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable.
use std::hint::black_box;
use iai_callgrind::{library_benchmark, library_benchmark_group, main};
use libm_test::gen::spaced;
use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
const BENCH_ITER_ITEMS: u64 = 500;
macro_rules! icount_benches {
(
fn_name: $fn_name:ident,
attrs: [$($_attr:meta),*],
) => {
paste::paste! {
// Construct benchmark inputs from the logspace generator.
fn [< setup_ $fn_name >]() -> Vec<OpRustArgs<op::$fn_name::Routine>> {
type Op = op::$fn_name::Routine;
let mut ctx = CheckCtx::new(
Op::IDENTIFIER,
CheckBasis::None,
GeneratorKind::QuickSpaced
);
ctx.override_iterations(BENCH_ITER_ITEMS);
let ret = spaced::get_test_cases::<Op>(&ctx).0.collect::<Vec<_>>();
println!("operation {}, {} steps", Op::NAME, ret.len());
ret
}
// Run benchmarks with the above inputs.
#[library_benchmark]
#[bench::logspace([< setup_ $fn_name >]())]
fn [< icount_bench_ $fn_name >](cases: Vec<OpRustArgs<op::$fn_name::Routine>>) {
type Op = op::$fn_name::Routine;
let f = black_box(Op::ROUTINE);
for input in cases.iter().copied() {
input.call(f);
}
}
library_benchmark_group!(
name = [< icount_bench_ $fn_name _group >];
benchmarks = [< icount_bench_ $fn_name >]
);
}
};
}
libm_macros::for_each_function! {
callback: icount_benches,
}
main!(
library_benchmark_groups = icount_bench_acos_group,
icount_bench_acosf_group,
icount_bench_acosh_group,
icount_bench_acoshf_group,
icount_bench_asin_group,
icount_bench_asinf_group,
icount_bench_asinh_group,
icount_bench_asinhf_group,
icount_bench_atan2_group,
icount_bench_atan2f_group,
icount_bench_atan_group,
icount_bench_atanf_group,
icount_bench_atanh_group,
icount_bench_atanhf_group,
icount_bench_cbrt_group,
icount_bench_cbrtf_group,
icount_bench_ceil_group,
icount_bench_ceilf_group,
icount_bench_copysign_group,
icount_bench_copysignf128_group,
icount_bench_copysignf16_group,
icount_bench_copysignf_group,
icount_bench_cos_group,
icount_bench_cosf_group,
icount_bench_cosh_group,
icount_bench_coshf_group,
icount_bench_erf_group,
icount_bench_erfc_group,
icount_bench_erfcf_group,
icount_bench_erff_group,
icount_bench_exp10_group,
icount_bench_exp10f_group,
icount_bench_exp2_group,
icount_bench_exp2f_group,
icount_bench_exp_group,
icount_bench_expf_group,
icount_bench_expm1_group,
icount_bench_expm1f_group,
icount_bench_fabs_group,
icount_bench_fabsf128_group,
icount_bench_fabsf16_group,
icount_bench_fabsf_group,
icount_bench_fdim_group,
icount_bench_fdimf128_group,
icount_bench_fdimf16_group,
icount_bench_fdimf_group,
icount_bench_floor_group,
icount_bench_floorf_group,
icount_bench_fma_group,
icount_bench_fmaf_group,
icount_bench_fmax_group,
icount_bench_fmaxf_group,
icount_bench_fmin_group,
icount_bench_fminf_group,
icount_bench_fmod_group,
icount_bench_fmodf_group,
icount_bench_frexp_group,
icount_bench_frexpf_group,
icount_bench_hypot_group,
icount_bench_hypotf_group,
icount_bench_ilogb_group,
icount_bench_ilogbf_group,
icount_bench_j0_group,
icount_bench_j0f_group,
icount_bench_j1_group,
icount_bench_j1f_group,
icount_bench_jn_group,
icount_bench_jnf_group,
icount_bench_ldexp_group,
icount_bench_ldexpf_group,
icount_bench_lgamma_group,
icount_bench_lgamma_r_group,
icount_bench_lgammaf_group,
icount_bench_lgammaf_r_group,
icount_bench_log10_group,
icount_bench_log10f_group,
icount_bench_log1p_group,
icount_bench_log1pf_group,
icount_bench_log2_group,
icount_bench_log2f_group,
icount_bench_log_group,
icount_bench_logf_group,
icount_bench_modf_group,
icount_bench_modff_group,
icount_bench_nextafter_group,
icount_bench_nextafterf_group,
icount_bench_pow_group,
icount_bench_powf_group,
icount_bench_remainder_group,
icount_bench_remainderf_group,
icount_bench_remquo_group,
icount_bench_remquof_group,
icount_bench_rint_group,
icount_bench_rintf_group,
icount_bench_round_group,
icount_bench_roundf_group,
icount_bench_scalbn_group,
icount_bench_scalbnf_group,
icount_bench_sin_group,
icount_bench_sinf_group,
icount_bench_sinh_group,
icount_bench_sinhf_group,
icount_bench_sqrt_group,
icount_bench_sqrtf_group,
icount_bench_tan_group,
icount_bench_tanf_group,
icount_bench_tanh_group,
icount_bench_tanhf_group,
icount_bench_tgamma_group,
icount_bench_tgammaf_group,
icount_bench_trunc_group,
icount_bench_truncf128_group,
icount_bench_truncf16_group,
icount_bench_truncf_group,
icount_bench_y0_group,
icount_bench_y0f_group,
icount_bench_y1_group,
icount_bench_y1f_group,
icount_bench_yn_group,
icount_bench_ynf_group,
);

View file

@ -151,8 +151,8 @@ pub fn musl_random() {
// about the same time as other tests.
if cfg!(feature = "short-benchmarks") {
criterion = criterion
.warm_up_time(Duration::from_millis(500))
.measurement_time(Duration::from_millis(1000));
.warm_up_time(Duration::from_millis(200))
.measurement_time(Duration::from_millis(600));
}
criterion = criterion.configure_from_args();

View file

@ -24,7 +24,8 @@ pub use f8_impl::f8;
pub use libm::support::{Float, Int, IntTy, MinInt};
pub use num::{FloatExt, linear_ints, logspace};
pub use op::{
BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty,
BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet,
Ty,
};
pub use precision::{MaybeOverride, SpecialCase, default_ulp};
use run_cfg::EXTENSIVE_MAX_ITERATIONS;

View file

@ -100,6 +100,8 @@ pub type OpCFn<Op> = <Op as MathOp>::CFn;
pub type OpCRet<Op> = <Op as MathOp>::CRet;
/// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
pub type OpRustFn<Op> = <Op as MathOp>::RustFn;
/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types).
pub type OpRustArgs<Op> = <Op as MathOp>::RustArgs;
/// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
pub type OpRustRet<Op> = <Op as MathOp>::RustRet;

View file

@ -4,6 +4,7 @@
use core::f32;
use CheckBasis::{Mpfr, Musl};
use libm::support::CastFrom;
use {BaseName as Bn, Identifier as Id};
use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult};
@ -524,7 +525,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase {
ctx: &CheckCtx,
) -> Option<TestResult> {
match (&ctx.basis, ctx.base_name) {
(Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
(Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx),
// We return +0.0, MPFR returns -0.0
(Mpfr, BaseName::Jn | BaseName::Yn)
@ -554,7 +555,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
ctx: &CheckCtx,
) -> Option<TestResult> {
match (&ctx.basis, ctx.base_name) {
(Musl, _) => bessel_prec_dropoff(input, ulp, ctx),
(Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx),
// We return +0.0, MPFR returns -0.0
(Mpfr, BaseName::Jn | BaseName::Yn)
@ -569,8 +570,10 @@ impl MaybeOverride<(i32, f64)> for SpecialCase {
}
/// Our bessel functions blow up with large N values
fn bessel_prec_dropoff<F: Float>(
input: (i32, F),
fn bessel_prec_dropoff<F1: Float, F2: Float>(
input: (i32, F1),
actual: F2,
expected: F2,
ulp: &mut u32,
ctx: &CheckCtx,
) -> Option<TestResult> {
@ -585,6 +588,17 @@ fn bessel_prec_dropoff<F: Float>(
}
}
// Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should
// be -3.2161271e38.
if ctx.fn_ident == Identifier::Ynf
&& !expected.is_infinite()
&& actual.is_infinite()
&& (expected.abs().to_bits().abs_diff(actual.abs().to_bits())
< F2::Int::cast_from(1_000_000u32))
{
return XFAIL;
}
None
}

View file

@ -40,6 +40,8 @@ pub struct CheckCtx {
/// Source of truth for tests.
pub basis: CheckBasis,
pub gen_kind: GeneratorKind,
/// If specified, this value will override the value returned by [`iteration_count`].
pub override_iterations: Option<u64>,
}
impl CheckCtx {
@ -53,6 +55,7 @@ impl CheckCtx {
base_name_str: fn_ident.base_name().as_str(),
basis,
gen_kind,
override_iterations: None,
};
ret.ulp = crate::default_ulp(&ret);
ret
@ -62,6 +65,10 @@ impl CheckCtx {
pub fn input_count(&self) -> usize {
self.fn_ident.math_op().rust_sig.args.len()
}
pub fn override_iterations(&mut self, count: u64) {
self.override_iterations = Some(count)
}
}
/// Possible items to test against
@ -71,6 +78,8 @@ pub enum CheckBasis {
Musl,
/// Check against infinite precision (MPFR).
Mpfr,
/// Benchmarks or other times when this is not relevant.
None,
}
/// The different kinds of generators that provide test input, which account for input pattern
@ -216,6 +225,12 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
total_iterations = 800;
}
let mut overridden = false;
if let Some(count) = ctx.override_iterations {
total_iterations = count;
overridden = true;
}
// Adjust for the number of inputs
let ntests = match t_env.input_count {
1 => total_iterations,
@ -223,6 +238,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
3 => (total_iterations as f64).cbrt().ceil() as u64,
_ => panic!("test has more than three arguments"),
};
let total = ntests.pow(t_env.input_count.try_into().unwrap());
let seed_msg = match ctx.gen_kind {
@ -235,12 +251,13 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
test_log(&format!(
"{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \
({total} total){seed_msg}",
({total} total){seed_msg}{omsg}",
gen_kind = ctx.gen_kind,
basis = ctx.basis,
fn_ident = ctx.fn_ident,
arg = argnum + 1,
args = t_env.input_count,
omsg = if overridden { " (overridden)" } else { "" }
));
ntests