diff --git a/library/compiler-builtins/libm/.github/workflows/main.yaml b/library/compiler-builtins/libm/.github/workflows/main.yaml index 99a32a82ec1d..9face93110b1 100644 --- a/library/compiler-builtins/libm/.github/workflows/main.yaml +++ b/library/compiler-builtins/libm/.github/workflows/main.yaml @@ -10,6 +10,7 @@ env: RUSTDOCFLAGS: -Dwarnings RUSTFLAGS: -Dwarnings RUST_BACKTRACE: full + BENCHMARK_RUSTC: nightly-2025-01-16 # Pin the toolchain for reproducable results jobs: test: @@ -147,19 +148,70 @@ jobs: benchmarks: name: Benchmarks runs-on: ubuntu-24.04 + timeout-minutes: 20 steps: - uses: actions/checkout@master - - name: Install Rust - run: rustup update nightly --no-self-update && rustup default nightly + - uses: taiki-e/install-action@cargo-binstall + + - name: Set up dependencies + run: | + rustup update "$BENCHMARK_RUSTC" --no-self-update + rustup default "$BENCHMARK_RUSTC" + # Install the version of iai-callgrind-runner that is specified in Cargo.toml + iai_version="$(cargo metadata --format-version=1 --features icount | + jq -r '.packages[] | select(.name == "iai-callgrind").version')" + cargo binstall -y iai-callgrind-runner --version "$iai_version" + sudo apt-get install valgrind + - uses: Swatinem/rust-cache@v2 - name: Download musl source run: ./ci/download-musl.sh - - run: | + + - name: Run icount benchmarks + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + iai_home="iai-home" + # Download the baseline from master + ./ci/ci-util.py locate-baseline --download --extract + + # Run iai-callgrind benchmarks + cargo bench --no-default-features \ + --features unstable,unstable-float,icount \ + --bench icount \ + -- \ + --save-baseline=default \ + --home "$(pwd)/$iai_home" \ + --regression='ir=5.0' \ + --save-summary + # NB: iai-callgrind should exit on error but does not, so we inspect the sumary + # for errors. See https://github.com/iai-callgrind/iai-callgrind/issues/337 + ./ci/ci-util.py check-regressions "$iai_home" + + # Name and tar the new baseline + name="baseline-icount-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}" + echo "BASELINE_NAME=$name" >> "$GITHUB_ENV" + tar cJf "$name.tar.xz" "$iai_home" + + - name: Upload the benchmark baseline + uses: actions/upload-artifact@v4 + with: + name: ${{ env.BASELINE_NAME }} + path: ${{ env.BASELINE_NAME }}.tar.xz + + - name: Run wall time benchmarks + run: | # Always use the same seed for benchmarks. Ideally we should switch to a # non-random generator. export LIBM_SEED=benchesbenchesbenchesbencheswoo! cargo bench --all --features libm-test/short-benchmarks,libm-test/build-musl + - name: Print test logs if available + if: always() + run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi + shell: bash + msrv: name: Check MSRV runs-on: ubuntu-24.04 diff --git a/library/compiler-builtins/libm/Cargo.toml b/library/compiler-builtins/libm/Cargo.toml index f84f3eac6a4d..18d89997dcba 100644 --- a/library/compiler-builtins/libm/Cargo.toml +++ b/library/compiler-builtins/libm/Cargo.toml @@ -73,3 +73,7 @@ debug-assertions = true inherits = "release" lto = "fat" overflow-checks = true + +[profile.bench] +# Required for iai-callgrind +debug = true diff --git a/library/compiler-builtins/libm/ci/ci-util.py b/library/compiler-builtins/libm/ci/ci-util.py index 733ec26fa33c..1ec69b002aed 100755 --- a/library/compiler-builtins/libm/ci/ci-util.py +++ b/library/compiler-builtins/libm/ci/ci-util.py @@ -9,6 +9,7 @@ import json import subprocess as sp import sys from dataclasses import dataclass +from glob import glob, iglob from inspect import cleandoc from os import getenv from pathlib import Path @@ -18,16 +19,33 @@ USAGE = cleandoc( """ usage: - ./ci/ci-util.py + ./ci/ci-util.py [flags] - SUBCOMMAND: - generate-matrix Calculate a matrix of which functions had source change, - print that as JSON object. + COMMAND: + generate-matrix + Calculate a matrix of which functions had source change, print that as + a JSON object. + + locate-baseline [--download] [--extract] + Locate the most recent benchmark baseline available in CI and, if flags + specify, download and extract it. Never exits with nonzero status if + downloading fails. + + Note that `--extract` will overwrite files in `iai-home`. + + check-regressions [iai-home] + Check `iai-home` (or `iai-home` if unspecified) for `summary.json` + files and see if there are any regressions. This is used as a workaround + for `iai-callgrind` not exiting with error status; see + . """ ) REPO_ROOT = Path(__file__).parent.parent GIT = ["git", "-C", REPO_ROOT] +DEFAULT_BRANCH = "master" +WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts +ARTIFACT_GLOB = "baseline-icount*" # Don't run exhaustive tests if these files change, even if they contaiin a function # definition. @@ -40,6 +58,11 @@ IGNORE_FILES = [ TYPES = ["f16", "f32", "f64", "f128"] +def eprint(*args, **kwargs): + """Print to stderr.""" + print(*args, file=sys.stderr, **kwargs) + + class FunctionDef(TypedDict): """Type for an entry in `function-definitions.json`""" @@ -145,9 +168,125 @@ class Context: return output -def eprint(*args, **kwargs): - """Print to stderr.""" - print(*args, file=sys.stderr, **kwargs) +def locate_baseline(flags: list[str]) -> None: + """Find the most recent baseline from CI, download it if specified. + + This returns rather than erroring, even if the `gh` commands fail. This is to avoid + erroring in CI if the baseline is unavailable (artifact time limit exceeded, first + run on the branch, etc). + """ + + download = False + extract = False + + while len(flags) > 0: + match flags[0]: + case "--download": + download = True + case "--extract": + extract = True + case _: + eprint(USAGE) + exit(1) + flags = flags[1:] + + if extract and not download: + eprint("cannot extract without downloading") + exit(1) + + try: + # Locate the most recent job to complete with success on our branch + latest_job = sp.check_output( + [ + "gh", + "run", + "list", + "--limit=1", + "--status=success", + f"--branch={DEFAULT_BRANCH}", + "--json=databaseId,url,headSha,conclusion,createdAt," + "status,workflowDatabaseId,workflowName", + f'--jq=select(.[].workflowName == "{WORKFLOW_NAME}")', + ], + text=True, + ) + eprint(f"latest: '{latest_job}'") + except sp.CalledProcessError as e: + eprint(f"failed to run github command: {e}") + return + + try: + latest = json.loads(latest_job)[0] + eprint("latest job: ", json.dumps(latest, indent=4)) + except json.JSONDecodeError as e: + eprint(f"failed to decode json '{latest_job}', {e}") + return + + if not download: + eprint("--download not specified, returning") + return + + job_id = latest.get("databaseId") + if job_id is None: + eprint("skipping download step") + return + + sp.run( + ["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"], + check=False, + ) + + if not extract: + eprint("skipping extraction step") + return + + # Find the baseline with the most recent timestamp. GH downloads the files to e.g. + # `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together. + candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}") + if len(candidate_baselines) == 0: + eprint("no possible baseline directories found") + return + + candidate_baselines.sort(reverse=True) + baseline_archive = candidate_baselines[0] + eprint(f"extracting {baseline_archive}") + sp.run(["tar", "xJvf", baseline_archive], check=True) + eprint("baseline extracted successfully") + + +def check_iai_regressions(iai_home: str | None | Path): + """Find regressions in iai summary.json files, exit with failure if any are + found. + """ + if iai_home is None: + iai_home = "iai-home" + iai_home = Path(iai_home) + + found_summaries = False + regressions = [] + for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True): + found_summaries = True + with open(iai_home / summary_path, "r") as f: + summary = json.load(f) + + summary_regs = [] + run = summary["callgrind_summary"]["callgrind_run"] + name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"} + + for segment in run["segments"]: + summary_regs.extend(segment["regressions"]) + + summary_regs.extend(run["total"]["regressions"]) + + regressions.extend(name_entry | reg for reg in summary_regs) + + if not found_summaries: + eprint(f"did not find any summary.json files within {iai_home}") + exit(1) + + if len(regressions) > 0: + eprint("Found regressions:", json.dumps(regressions, indent=4)) + exit(1) def main(): @@ -156,6 +295,12 @@ def main(): ctx = Context() output = ctx.make_workflow_output() print(f"matrix={output}") + case ["locate-baseline", *flags]: + locate_baseline(flags) + case ["check-regressions"]: + check_iai_regressions(None) + case ["check-regressions", iai_home]: + check_iai_regressions(iai_home) case ["--help" | "-h"]: print(USAGE) exit() diff --git a/library/compiler-builtins/libm/crates/libm-test/Cargo.toml b/library/compiler-builtins/libm/crates/libm-test/Cargo.toml index d3f18ab3e5bc..3a1ba87962a7 100644 --- a/library/compiler-builtins/libm/crates/libm-test/Cargo.toml +++ b/library/compiler-builtins/libm/crates/libm-test/Cargo.toml @@ -20,6 +20,9 @@ build-musl = ["dep:musl-math-sys"] # Enable report generation without bringing in more dependencies by default benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + # Run with a reduced set of benchmarks, such as for CI short-benchmarks = [] @@ -27,6 +30,7 @@ short-benchmarks = [] anyhow = "1.0.90" az = { version = "1.2.1", optional = true } gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] } +iai-callgrind = { version = "0.14.0", optional = true } indicatif = { version = "0.17.9", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } @@ -48,6 +52,11 @@ rand = { version = "0.8.5", optional = true } criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } libtest-mimic = "0.8.1" +[[bench]] +name = "icount" +harness = false +required-features = ["icount"] + [[bench]] name = "random" harness = false diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs new file mode 100644 index 000000000000..3a66249e85dc --- /dev/null +++ b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs @@ -0,0 +1,175 @@ +//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable. + +use std::hint::black_box; + +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; +use libm_test::gen::spaced; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op}; + +const BENCH_ITER_ITEMS: u64 = 500; + +macro_rules! icount_benches { + ( + fn_name: $fn_name:ident, + attrs: [$($_attr:meta),*], + ) => { + paste::paste! { + // Construct benchmark inputs from the logspace generator. + fn [< setup_ $fn_name >]() -> Vec> { + type Op = op::$fn_name::Routine; + let mut ctx = CheckCtx::new( + Op::IDENTIFIER, + CheckBasis::None, + GeneratorKind::QuickSpaced + ); + ctx.override_iterations(BENCH_ITER_ITEMS); + let ret = spaced::get_test_cases::(&ctx).0.collect::>(); + println!("operation {}, {} steps", Op::NAME, ret.len()); + ret + } + + // Run benchmarks with the above inputs. + #[library_benchmark] + #[bench::logspace([< setup_ $fn_name >]())] + fn [< icount_bench_ $fn_name >](cases: Vec>) { + type Op = op::$fn_name::Routine; + let f = black_box(Op::ROUTINE); + for input in cases.iter().copied() { + input.call(f); + } + } + + library_benchmark_group!( + name = [< icount_bench_ $fn_name _group >]; + benchmarks = [< icount_bench_ $fn_name >] + ); + } + }; +} + +libm_macros::for_each_function! { + callback: icount_benches, +} + +main!( + library_benchmark_groups = icount_bench_acos_group, + icount_bench_acosf_group, + icount_bench_acosh_group, + icount_bench_acoshf_group, + icount_bench_asin_group, + icount_bench_asinf_group, + icount_bench_asinh_group, + icount_bench_asinhf_group, + icount_bench_atan2_group, + icount_bench_atan2f_group, + icount_bench_atan_group, + icount_bench_atanf_group, + icount_bench_atanh_group, + icount_bench_atanhf_group, + icount_bench_cbrt_group, + icount_bench_cbrtf_group, + icount_bench_ceil_group, + icount_bench_ceilf_group, + icount_bench_copysign_group, + icount_bench_copysignf128_group, + icount_bench_copysignf16_group, + icount_bench_copysignf_group, + icount_bench_cos_group, + icount_bench_cosf_group, + icount_bench_cosh_group, + icount_bench_coshf_group, + icount_bench_erf_group, + icount_bench_erfc_group, + icount_bench_erfcf_group, + icount_bench_erff_group, + icount_bench_exp10_group, + icount_bench_exp10f_group, + icount_bench_exp2_group, + icount_bench_exp2f_group, + icount_bench_exp_group, + icount_bench_expf_group, + icount_bench_expm1_group, + icount_bench_expm1f_group, + icount_bench_fabs_group, + icount_bench_fabsf128_group, + icount_bench_fabsf16_group, + icount_bench_fabsf_group, + icount_bench_fdim_group, + icount_bench_fdimf128_group, + icount_bench_fdimf16_group, + icount_bench_fdimf_group, + icount_bench_floor_group, + icount_bench_floorf_group, + icount_bench_fma_group, + icount_bench_fmaf_group, + icount_bench_fmax_group, + icount_bench_fmaxf_group, + icount_bench_fmin_group, + icount_bench_fminf_group, + icount_bench_fmod_group, + icount_bench_fmodf_group, + icount_bench_frexp_group, + icount_bench_frexpf_group, + icount_bench_hypot_group, + icount_bench_hypotf_group, + icount_bench_ilogb_group, + icount_bench_ilogbf_group, + icount_bench_j0_group, + icount_bench_j0f_group, + icount_bench_j1_group, + icount_bench_j1f_group, + icount_bench_jn_group, + icount_bench_jnf_group, + icount_bench_ldexp_group, + icount_bench_ldexpf_group, + icount_bench_lgamma_group, + icount_bench_lgamma_r_group, + icount_bench_lgammaf_group, + icount_bench_lgammaf_r_group, + icount_bench_log10_group, + icount_bench_log10f_group, + icount_bench_log1p_group, + icount_bench_log1pf_group, + icount_bench_log2_group, + icount_bench_log2f_group, + icount_bench_log_group, + icount_bench_logf_group, + icount_bench_modf_group, + icount_bench_modff_group, + icount_bench_nextafter_group, + icount_bench_nextafterf_group, + icount_bench_pow_group, + icount_bench_powf_group, + icount_bench_remainder_group, + icount_bench_remainderf_group, + icount_bench_remquo_group, + icount_bench_remquof_group, + icount_bench_rint_group, + icount_bench_rintf_group, + icount_bench_round_group, + icount_bench_roundf_group, + icount_bench_scalbn_group, + icount_bench_scalbnf_group, + icount_bench_sin_group, + icount_bench_sinf_group, + icount_bench_sinh_group, + icount_bench_sinhf_group, + icount_bench_sqrt_group, + icount_bench_sqrtf_group, + icount_bench_tan_group, + icount_bench_tanf_group, + icount_bench_tanh_group, + icount_bench_tanhf_group, + icount_bench_tgamma_group, + icount_bench_tgammaf_group, + icount_bench_trunc_group, + icount_bench_truncf128_group, + icount_bench_truncf16_group, + icount_bench_truncf_group, + icount_bench_y0_group, + icount_bench_y0f_group, + icount_bench_y1_group, + icount_bench_y1f_group, + icount_bench_yn_group, + icount_bench_ynf_group, +); diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs index dcc7c1acac36..888161265100 100644 --- a/library/compiler-builtins/libm/crates/libm-test/benches/random.rs +++ b/library/compiler-builtins/libm/crates/libm-test/benches/random.rs @@ -151,8 +151,8 @@ pub fn musl_random() { // about the same time as other tests. if cfg!(feature = "short-benchmarks") { criterion = criterion - .warm_up_time(Duration::from_millis(500)) - .measurement_time(Duration::from_millis(1000)); + .warm_up_time(Duration::from_millis(200)) + .measurement_time(Duration::from_millis(600)); } criterion = criterion.configure_from_args(); diff --git a/library/compiler-builtins/libm/crates/libm-test/src/lib.rs b/library/compiler-builtins/libm/crates/libm-test/src/lib.rs index cb89f1c8ba7f..b90423c1bacc 100644 --- a/library/compiler-builtins/libm/crates/libm-test/src/lib.rs +++ b/library/compiler-builtins/libm/crates/libm-test/src/lib.rs @@ -24,7 +24,8 @@ pub use f8_impl::f8; pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, linear_ints, logspace}; pub use op::{ - BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty, + BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet, + Ty, }; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; use run_cfg::EXTENSIVE_MAX_ITERATIONS; diff --git a/library/compiler-builtins/libm/crates/libm-test/src/op.rs b/library/compiler-builtins/libm/crates/libm-test/src/op.rs index 8329d3424c32..239c9a3e1fdf 100644 --- a/library/compiler-builtins/libm/crates/libm-test/src/op.rs +++ b/library/compiler-builtins/libm/crates/libm-test/src/op.rs @@ -100,6 +100,8 @@ pub type OpCFn = ::CFn; pub type OpCRet = ::CRet; /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types). pub type OpRustFn = ::RustFn; +/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types). +pub type OpRustArgs = ::RustArgs; /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). pub type OpRustRet = ::RustRet; diff --git a/library/compiler-builtins/libm/crates/libm-test/src/precision.rs b/library/compiler-builtins/libm/crates/libm-test/src/precision.rs index 3cb5e420f260..9d17ab8cccfd 100644 --- a/library/compiler-builtins/libm/crates/libm-test/src/precision.rs +++ b/library/compiler-builtins/libm/crates/libm-test/src/precision.rs @@ -4,6 +4,7 @@ use core::f32; use CheckBasis::{Mpfr, Musl}; +use libm::support::CastFrom; use {BaseName as Bn, Identifier as Id}; use crate::{BaseName, CheckBasis, CheckCtx, Float, Identifier, Int, TestResult}; @@ -524,7 +525,7 @@ impl MaybeOverride<(i32, f32)> for SpecialCase { ctx: &CheckCtx, ) -> Option { match (&ctx.basis, ctx.base_name) { - (Musl, _) => bessel_prec_dropoff(input, ulp, ctx), + (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx), // We return +0.0, MPFR returns -0.0 (Mpfr, BaseName::Jn | BaseName::Yn) @@ -554,7 +555,7 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { ctx: &CheckCtx, ) -> Option { match (&ctx.basis, ctx.base_name) { - (Musl, _) => bessel_prec_dropoff(input, ulp, ctx), + (Musl, _) => bessel_prec_dropoff(input, actual, expected, ulp, ctx), // We return +0.0, MPFR returns -0.0 (Mpfr, BaseName::Jn | BaseName::Yn) @@ -569,8 +570,10 @@ impl MaybeOverride<(i32, f64)> for SpecialCase { } /// Our bessel functions blow up with large N values -fn bessel_prec_dropoff( - input: (i32, F), +fn bessel_prec_dropoff( + input: (i32, F1), + actual: F2, + expected: F2, ulp: &mut u32, ctx: &CheckCtx, ) -> Option { @@ -585,6 +588,17 @@ fn bessel_prec_dropoff( } } + // Values near infinity sometimes get cut off for us. `ynf(681, 509.90924) = -inf` but should + // be -3.2161271e38. + if ctx.fn_ident == Identifier::Ynf + && !expected.is_infinite() + && actual.is_infinite() + && (expected.abs().to_bits().abs_diff(actual.abs().to_bits()) + < F2::Int::cast_from(1_000_000u32)) + { + return XFAIL; + } + None } diff --git a/library/compiler-builtins/libm/crates/libm-test/src/run_cfg.rs b/library/compiler-builtins/libm/crates/libm-test/src/run_cfg.rs index 6763de8bcc8d..3e91101f6cac 100644 --- a/library/compiler-builtins/libm/crates/libm-test/src/run_cfg.rs +++ b/library/compiler-builtins/libm/crates/libm-test/src/run_cfg.rs @@ -40,6 +40,8 @@ pub struct CheckCtx { /// Source of truth for tests. pub basis: CheckBasis, pub gen_kind: GeneratorKind, + /// If specified, this value will override the value returned by [`iteration_count`]. + pub override_iterations: Option, } impl CheckCtx { @@ -53,6 +55,7 @@ impl CheckCtx { base_name_str: fn_ident.base_name().as_str(), basis, gen_kind, + override_iterations: None, }; ret.ulp = crate::default_ulp(&ret); ret @@ -62,6 +65,10 @@ impl CheckCtx { pub fn input_count(&self) -> usize { self.fn_ident.math_op().rust_sig.args.len() } + + pub fn override_iterations(&mut self, count: u64) { + self.override_iterations = Some(count) + } } /// Possible items to test against @@ -71,6 +78,8 @@ pub enum CheckBasis { Musl, /// Check against infinite precision (MPFR). Mpfr, + /// Benchmarks or other times when this is not relevant. + None, } /// The different kinds of generators that provide test input, which account for input pattern @@ -216,6 +225,12 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { total_iterations = 800; } + let mut overridden = false; + if let Some(count) = ctx.override_iterations { + total_iterations = count; + overridden = true; + } + // Adjust for the number of inputs let ntests = match t_env.input_count { 1 => total_iterations, @@ -223,6 +238,7 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { 3 => (total_iterations as f64).cbrt().ceil() as u64, _ => panic!("test has more than three arguments"), }; + let total = ntests.pow(t_env.input_count.try_into().unwrap()); let seed_msg = match ctx.gen_kind { @@ -235,12 +251,13 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 { test_log(&format!( "{gen_kind:?} {basis:?} {fn_ident} arg {arg}/{args}: {ntests} iterations \ - ({total} total){seed_msg}", + ({total} total){seed_msg}{omsg}", gen_kind = ctx.gen_kind, basis = ctx.basis, fn_ident = ctx.fn_ident, arg = argnum + 1, args = t_env.input_count, + omsg = if overridden { " (overridden)" } else { "" } )); ntests