From 490ebbb187ff309db6599eb1616b100114dc834d Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Thu, 16 Jan 2025 07:30:38 +0000 Subject: [PATCH] Add benchmarks using iai-callgrind Running walltime benchmarks in CI is notoriously unstable, Introduce benchmarks that instead use instruction count and other more reproducible metrics, using `iai-callgrind` [1], which we are able to run in CI with a high degree of reproducibility. Inputs to this benchmark are a logspace sweep, which gives an approximation for real-world use, but may fail to indicate outlier cases. [1]: https://github.com/iai-callgrind/iai-callgrind --- library/compiler-builtins/libm/Cargo.toml | 4 + .../libm/crates/libm-test/Cargo.toml | 9 + .../libm/crates/libm-test/benches/icount.rs | 175 ++++++++++++++++++ .../libm/crates/libm-test/src/lib.rs | 3 +- .../libm/crates/libm-test/src/op.rs | 2 + 5 files changed, 192 insertions(+), 1 deletion(-) create mode 100644 library/compiler-builtins/libm/crates/libm-test/benches/icount.rs diff --git a/library/compiler-builtins/libm/Cargo.toml b/library/compiler-builtins/libm/Cargo.toml index f84f3eac6a4d..18d89997dcba 100644 --- a/library/compiler-builtins/libm/Cargo.toml +++ b/library/compiler-builtins/libm/Cargo.toml @@ -73,3 +73,7 @@ debug-assertions = true inherits = "release" lto = "fat" overflow-checks = true + +[profile.bench] +# Required for iai-callgrind +debug = true diff --git a/library/compiler-builtins/libm/crates/libm-test/Cargo.toml b/library/compiler-builtins/libm/crates/libm-test/Cargo.toml index d3f18ab3e5bc..3a1ba87962a7 100644 --- a/library/compiler-builtins/libm/crates/libm-test/Cargo.toml +++ b/library/compiler-builtins/libm/crates/libm-test/Cargo.toml @@ -20,6 +20,9 @@ build-musl = ["dep:musl-math-sys"] # Enable report generation without bringing in more dependencies by default benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] +# Enable icount benchmarks (requires iai-callgrind and valgrind) +icount = ["dep:iai-callgrind"] + # Run with a reduced set of benchmarks, such as for CI short-benchmarks = [] @@ -27,6 +30,7 @@ short-benchmarks = [] anyhow = "1.0.90" az = { version = "1.2.1", optional = true } gmp-mpfr-sys = { version = "1.6.4", optional = true, default-features = false, features = ["mpfr"] } +iai-callgrind = { version = "0.14.0", optional = true } indicatif = { version = "0.17.9", default-features = false } libm = { path = "../..", features = ["unstable-public-internals"] } libm-macros = { path = "../libm-macros" } @@ -48,6 +52,11 @@ rand = { version = "0.8.5", optional = true } criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } libtest-mimic = "0.8.1" +[[bench]] +name = "icount" +harness = false +required-features = ["icount"] + [[bench]] name = "random" harness = false diff --git a/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs new file mode 100644 index 000000000000..3a66249e85dc --- /dev/null +++ b/library/compiler-builtins/libm/crates/libm-test/benches/icount.rs @@ -0,0 +1,175 @@ +//! Benchmarks that use `iai-cachegrind` to be reasonably CI-stable. + +use std::hint::black_box; + +use iai_callgrind::{library_benchmark, library_benchmark_group, main}; +use libm_test::gen::spaced; +use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op}; + +const BENCH_ITER_ITEMS: u64 = 500; + +macro_rules! icount_benches { + ( + fn_name: $fn_name:ident, + attrs: [$($_attr:meta),*], + ) => { + paste::paste! { + // Construct benchmark inputs from the logspace generator. + fn [< setup_ $fn_name >]() -> Vec> { + type Op = op::$fn_name::Routine; + let mut ctx = CheckCtx::new( + Op::IDENTIFIER, + CheckBasis::None, + GeneratorKind::QuickSpaced + ); + ctx.override_iterations(BENCH_ITER_ITEMS); + let ret = spaced::get_test_cases::(&ctx).0.collect::>(); + println!("operation {}, {} steps", Op::NAME, ret.len()); + ret + } + + // Run benchmarks with the above inputs. + #[library_benchmark] + #[bench::logspace([< setup_ $fn_name >]())] + fn [< icount_bench_ $fn_name >](cases: Vec>) { + type Op = op::$fn_name::Routine; + let f = black_box(Op::ROUTINE); + for input in cases.iter().copied() { + input.call(f); + } + } + + library_benchmark_group!( + name = [< icount_bench_ $fn_name _group >]; + benchmarks = [< icount_bench_ $fn_name >] + ); + } + }; +} + +libm_macros::for_each_function! { + callback: icount_benches, +} + +main!( + library_benchmark_groups = icount_bench_acos_group, + icount_bench_acosf_group, + icount_bench_acosh_group, + icount_bench_acoshf_group, + icount_bench_asin_group, + icount_bench_asinf_group, + icount_bench_asinh_group, + icount_bench_asinhf_group, + icount_bench_atan2_group, + icount_bench_atan2f_group, + icount_bench_atan_group, + icount_bench_atanf_group, + icount_bench_atanh_group, + icount_bench_atanhf_group, + icount_bench_cbrt_group, + icount_bench_cbrtf_group, + icount_bench_ceil_group, + icount_bench_ceilf_group, + icount_bench_copysign_group, + icount_bench_copysignf128_group, + icount_bench_copysignf16_group, + icount_bench_copysignf_group, + icount_bench_cos_group, + icount_bench_cosf_group, + icount_bench_cosh_group, + icount_bench_coshf_group, + icount_bench_erf_group, + icount_bench_erfc_group, + icount_bench_erfcf_group, + icount_bench_erff_group, + icount_bench_exp10_group, + icount_bench_exp10f_group, + icount_bench_exp2_group, + icount_bench_exp2f_group, + icount_bench_exp_group, + icount_bench_expf_group, + icount_bench_expm1_group, + icount_bench_expm1f_group, + icount_bench_fabs_group, + icount_bench_fabsf128_group, + icount_bench_fabsf16_group, + icount_bench_fabsf_group, + icount_bench_fdim_group, + icount_bench_fdimf128_group, + icount_bench_fdimf16_group, + icount_bench_fdimf_group, + icount_bench_floor_group, + icount_bench_floorf_group, + icount_bench_fma_group, + icount_bench_fmaf_group, + icount_bench_fmax_group, + icount_bench_fmaxf_group, + icount_bench_fmin_group, + icount_bench_fminf_group, + icount_bench_fmod_group, + icount_bench_fmodf_group, + icount_bench_frexp_group, + icount_bench_frexpf_group, + icount_bench_hypot_group, + icount_bench_hypotf_group, + icount_bench_ilogb_group, + icount_bench_ilogbf_group, + icount_bench_j0_group, + icount_bench_j0f_group, + icount_bench_j1_group, + icount_bench_j1f_group, + icount_bench_jn_group, + icount_bench_jnf_group, + icount_bench_ldexp_group, + icount_bench_ldexpf_group, + icount_bench_lgamma_group, + icount_bench_lgamma_r_group, + icount_bench_lgammaf_group, + icount_bench_lgammaf_r_group, + icount_bench_log10_group, + icount_bench_log10f_group, + icount_bench_log1p_group, + icount_bench_log1pf_group, + icount_bench_log2_group, + icount_bench_log2f_group, + icount_bench_log_group, + icount_bench_logf_group, + icount_bench_modf_group, + icount_bench_modff_group, + icount_bench_nextafter_group, + icount_bench_nextafterf_group, + icount_bench_pow_group, + icount_bench_powf_group, + icount_bench_remainder_group, + icount_bench_remainderf_group, + icount_bench_remquo_group, + icount_bench_remquof_group, + icount_bench_rint_group, + icount_bench_rintf_group, + icount_bench_round_group, + icount_bench_roundf_group, + icount_bench_scalbn_group, + icount_bench_scalbnf_group, + icount_bench_sin_group, + icount_bench_sinf_group, + icount_bench_sinh_group, + icount_bench_sinhf_group, + icount_bench_sqrt_group, + icount_bench_sqrtf_group, + icount_bench_tan_group, + icount_bench_tanf_group, + icount_bench_tanh_group, + icount_bench_tanhf_group, + icount_bench_tgamma_group, + icount_bench_tgammaf_group, + icount_bench_trunc_group, + icount_bench_truncf128_group, + icount_bench_truncf16_group, + icount_bench_truncf_group, + icount_bench_y0_group, + icount_bench_y0f_group, + icount_bench_y1_group, + icount_bench_y1f_group, + icount_bench_yn_group, + icount_bench_ynf_group, +); diff --git a/library/compiler-builtins/libm/crates/libm-test/src/lib.rs b/library/compiler-builtins/libm/crates/libm-test/src/lib.rs index cb89f1c8ba7f..b90423c1bacc 100644 --- a/library/compiler-builtins/libm/crates/libm-test/src/lib.rs +++ b/library/compiler-builtins/libm/crates/libm-test/src/lib.rs @@ -24,7 +24,8 @@ pub use f8_impl::f8; pub use libm::support::{Float, Int, IntTy, MinInt}; pub use num::{FloatExt, linear_ints, logspace}; pub use op::{ - BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustFn, OpRustRet, Ty, + BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet, + Ty, }; pub use precision::{MaybeOverride, SpecialCase, default_ulp}; use run_cfg::EXTENSIVE_MAX_ITERATIONS; diff --git a/library/compiler-builtins/libm/crates/libm-test/src/op.rs b/library/compiler-builtins/libm/crates/libm-test/src/op.rs index 8329d3424c32..239c9a3e1fdf 100644 --- a/library/compiler-builtins/libm/crates/libm-test/src/op.rs +++ b/library/compiler-builtins/libm/crates/libm-test/src/op.rs @@ -100,6 +100,8 @@ pub type OpCFn = ::CFn; pub type OpCRet = ::CRet; /// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types). pub type OpRustFn = ::RustFn; +/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types). +pub type OpRustArgs = ::RustArgs; /// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types). pub type OpRustRet = ::RustRet;