Add extensive and exhaustive tests

Add a generator that will test all inputs for input spaces `u32::MAX` or
smaller (e.g. single-argument `f32` routines).  For anything larger,
still run approximately `u32::MAX` tests, but distribute inputs evenly
across the function domain.

Since we often only want to run one of these tests at a time, this
implementation parallelizes within each test using `rayon`. A custom
test runner is used so a progress bar is possible.

Specific tests must be enabled by setting the `LIBM_EXTENSIVE_TESTS`
environment variable, e.g.

    LIBM_EXTENSIVE_TESTS=all_f16,cos,cosf cargo run ...

Testing on a recent machine, most tests take about two minutes or less.
The Bessel functions are quite slow and take closer to 10 minutes, and
FMA is increased to run for about the same.
This commit is contained in:
Trevor Gross 2024-12-22 11:47:44 +00:00
parent 7c04b1916a
commit 1bbf8b12da
9 changed files with 450 additions and 8 deletions

View file

@ -26,12 +26,14 @@ short-benchmarks = []
[dependencies]
anyhow = "1.0.90"
az = { version = "1.2.1", optional = true }
indicatif = { version = "0.17.9", default-features = false }
libm = { path = "../..", features = ["unstable-public-internals"] }
libm-macros = { path = "../libm-macros" }
musl-math-sys = { path = "../musl-math-sys", optional = true }
paste = "1.0.15"
rand = "0.8.5"
rand_chacha = "0.3.1"
rayon = "1.10.0"
rug = { version = "1.26.1", optional = true, default-features = false, features = ["float", "std"] }
[target.'cfg(target_family = "wasm")'.dependencies]
@ -43,11 +45,18 @@ rand = { version = "0.8.5", optional = true }
[dev-dependencies]
criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
libtest-mimic = "0.8.1"
[[bench]]
name = "random"
harness = false
[[test]]
# No harness so that we can skip tests at runtime based on env. Prefixed with
# `z` so these tests get run last.
name = "z_extensive"
harness = false
[lints.rust]
# Values from the chared config.rs used by `libm` but not the test crate
unexpected_cfgs = { level = "warn", check-cfg = [

View file

@ -2,6 +2,7 @@
pub mod domain_logspace;
pub mod edge_cases;
pub mod extensive;
pub mod random;
/// A wrapper to turn any iterator into an `ExactSizeIterator`. Asserts the final result to ensure

View file

@ -0,0 +1,153 @@
use std::fmt;
use std::ops::RangeInclusive;
use libm::support::MinInt;
use crate::domain::HasDomain;
use crate::gen::KnownSize;
use crate::op::OpITy;
use crate::run_cfg::{int_range, iteration_count};
use crate::{CheckCtx, GeneratorKind, MathOp, logspace};
/// Generate a sequence of inputs that either cover the domain in completeness (for smaller float
/// types and single argument functions) or provide evenly spaced inputs across the domain with
/// approximately `u32::MAX` total iterations.
pub trait ExtensiveInput<Op> {
fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> + Send;
}
/// Construct an iterator from `logspace` and also calculate the total number of steps expected
/// for that iterator.
fn logspace_steps<Op>(
start: Op::FTy,
end: Op::FTy,
ctx: &CheckCtx,
argnum: usize,
) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
where
Op: MathOp,
OpITy<Op>: TryFrom<u64, Error: fmt::Debug>,
RangeInclusive<OpITy<Op>>: Iterator,
{
let max_steps = iteration_count(ctx, GeneratorKind::Extensive, argnum);
let max_steps = OpITy::<Op>::try_from(max_steps).unwrap_or(OpITy::<Op>::MAX);
let iter = logspace(start, end, max_steps);
// `logspace` can't implement `ExactSizeIterator` because of the range, but its size hint
// should be accurate (assuming <= usize::MAX iterations).
let size_hint = iter.size_hint();
assert_eq!(size_hint.0, size_hint.1.unwrap());
(iter, size_hint.0.try_into().unwrap())
}
macro_rules! impl_extensive_input {
($fty:ty) => {
impl<Op> ExtensiveInput<Op> for ($fty,)
where
Op: MathOp<RustArgs = Self, FTy = $fty>,
Op: HasDomain<Op::FTy>,
{
fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
let start = Op::DOMAIN.range_start();
let end = Op::DOMAIN.range_end();
let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
let iter0 = iter0.map(|v| (v,));
KnownSize::new(iter0, steps0)
}
}
impl<Op> ExtensiveInput<Op> for ($fty, $fty)
where
Op: MathOp<RustArgs = Self, FTy = $fty>,
{
fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
let start = <$fty>::NEG_INFINITY;
let end = <$fty>::INFINITY;
let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
let iter =
iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
let count = steps0.checked_mul(steps1).unwrap();
KnownSize::new(iter, count)
}
}
impl<Op> ExtensiveInput<Op> for ($fty, $fty, $fty)
where
Op: MathOp<RustArgs = Self, FTy = $fty>,
{
fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
let start = <$fty>::NEG_INFINITY;
let end = <$fty>::INFINITY;
let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
let (iter2, steps2) = logspace_steps::<Op>(start, end, ctx, 2);
let iter = iter0
.flat_map(move |first| iter1.clone().map(move |second| (first, second)))
.flat_map(move |(first, second)| {
iter2.clone().map(move |third| (first, second, third))
});
let count = steps0.checked_mul(steps1).unwrap().checked_mul(steps2).unwrap();
KnownSize::new(iter, count)
}
}
impl<Op> ExtensiveInput<Op> for (i32, $fty)
where
Op: MathOp<RustArgs = Self, FTy = $fty>,
{
fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
let start = <$fty>::NEG_INFINITY;
let end = <$fty>::INFINITY;
let iter0 = int_range(ctx, GeneratorKind::Extensive, 0);
let steps0 = iteration_count(ctx, GeneratorKind::Extensive, 0);
let (iter1, steps1) = logspace_steps::<Op>(start, end, ctx, 1);
let iter =
iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
let count = steps0.checked_mul(steps1).unwrap();
KnownSize::new(iter, count)
}
}
impl<Op> ExtensiveInput<Op> for ($fty, i32)
where
Op: MathOp<RustArgs = Self, FTy = $fty>,
{
fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
let start = <$fty>::NEG_INFINITY;
let end = <$fty>::INFINITY;
let (iter0, steps0) = logspace_steps::<Op>(start, end, ctx, 0);
let iter1 = int_range(ctx, GeneratorKind::Extensive, 0);
let steps1 = iteration_count(ctx, GeneratorKind::Extensive, 0);
let iter =
iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
let count = steps0.checked_mul(steps1).unwrap();
KnownSize::new(iter, count)
}
}
};
}
impl_extensive_input!(f32);
impl_extensive_input!(f64);
/// Create a test case iterator for extensive inputs.
pub fn get_test_cases<Op>(
ctx: &CheckCtx,
) -> impl ExactSizeIterator<Item = Op::RustArgs> + Send + use<'_, Op>
where
Op: MathOp,
Op::RustArgs: ExtensiveInput<Op>,
{
Op::RustArgs::get_cases(ctx)
}

View file

@ -86,7 +86,7 @@ macro_rules! impl_random_input {
fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
let range0 = int_range(ctx, 0);
let range0 = int_range(ctx, GeneratorKind::Random, 0);
let iter = random_ints(count0, range0)
.flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2)));
KnownSize::new(iter, count0 * count1)
@ -97,7 +97,7 @@ macro_rules! impl_random_input {
fn get_cases(ctx: &CheckCtx) -> impl ExactSizeIterator<Item = Self> {
let count0 = iteration_count(ctx, GeneratorKind::Random, 0);
let count1 = iteration_count(ctx, GeneratorKind::Random, 1);
let range1 = int_range(ctx, 1);
let range1 = int_range(ctx, GeneratorKind::Random, 1);
let iter = random_floats(count0).flat_map(move |f1: $fty| {
random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2))
});

View file

@ -25,7 +25,8 @@ pub use libm::support::{Float, Int, IntTy, MinInt};
pub use num::{FloatExt, logspace};
pub use op::{BaseName, FloatTy, Identifier, MathOp, OpCFn, OpFTy, OpRustFn, OpRustRet, Ty};
pub use precision::{MaybeOverride, SpecialCase, default_ulp};
pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind};
use run_cfg::EXTENSIVE_MAX_ITERATIONS;
pub use run_cfg::{CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, skip_extensive_test};
pub use test_traits::{CheckOutput, Hex, TupleCall};
/// Result type for tests is usually from `anyhow`. Most times there is no success value to
@ -85,6 +86,7 @@ pub fn test_log(s: &str) {
writeln!(f, "cargo features: {}", env!("CFG_CARGO_FEATURES")).unwrap();
writeln!(f, "opt level: {}", env!("CFG_OPT_LEVEL")).unwrap();
writeln!(f, "target features: {}", env!("CFG_TARGET_FEATURES")).unwrap();
writeln!(f, "extensive iterations {}", *EXTENSIVE_MAX_ITERATIONS).unwrap();
Some(f)
});

View file

@ -215,7 +215,7 @@ fn as_ulp_steps<F: Float>(x: F) -> Option<F::SignedInt> {
/// to logarithmic spacing of their values.
///
/// Note that this tends to skip negative zero, so that needs to be checked explicitly.
pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F>
pub fn logspace<F: FloatExt>(start: F, end: F, steps: F::Int) -> impl Iterator<Item = F> + Clone
where
RangeInclusive<F::Int>: Iterator,
{

View file

@ -10,6 +10,22 @@ use crate::{BaseName, FloatTy, Identifier, test_log};
/// The environment variable indicating which extensive tests should be run.
pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS";
/// Specify the number of iterations via this environment variable, rather than using the default.
pub const EXTENSIVE_ITER_ENV: &str = "LIBM_EXTENSIVE_ITERATIONS";
/// Maximum number of iterations to run for a single routine.
///
/// The default value of one greater than `u32::MAX` allows testing single-argument `f32` routines
/// and single- or double-argument `f16` routines exhaustively. `f64` and `f128` can't feasibly
/// be tested exhaustively; however, [`EXTENSIVE_ITER_ENV`] can be set to run tests for multiple
/// hours.
pub static EXTENSIVE_MAX_ITERATIONS: LazyLock<u64> = LazyLock::new(|| {
let default = 1 << 32;
env::var(EXTENSIVE_ITER_ENV)
.map(|v| v.parse().expect("failed to parse iteration count"))
.unwrap_or(default)
});
/// Context passed to [`CheckOutput`].
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CheckCtx {
@ -54,6 +70,7 @@ pub enum CheckBasis {
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum GeneratorKind {
Domain,
Extensive,
Random,
}
@ -171,8 +188,14 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
let mut total_iterations = match gen_kind {
GeneratorKind::Domain => domain_iter_count,
GeneratorKind::Random => random_iter_count,
GeneratorKind::Extensive => *EXTENSIVE_MAX_ITERATIONS,
};
// FMA has a huge domain but is reasonably fast to run, so increase iterations.
if ctx.base_name == BaseName::Fma {
total_iterations *= 4;
}
if cfg!(optimizations_enabled) {
// Always run at least 10,000 tests.
total_iterations = total_iterations.max(10_000);
@ -191,7 +214,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
let total = ntests.pow(t_env.input_count.try_into().unwrap());
let seed_msg = match gen_kind {
GeneratorKind::Domain => String::new(),
GeneratorKind::Domain | GeneratorKind::Extensive => String::new(),
GeneratorKind::Random => {
format!(" using `{SEED_ENV}={}`", str::from_utf8(SEED.as_slice()).unwrap())
}
@ -210,7 +233,7 @@ pub fn iteration_count(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -
}
/// Some tests require that an integer be kept within reasonable limits; generate that here.
pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
pub fn int_range(ctx: &CheckCtx, gen_kind: GeneratorKind, argnum: usize) -> RangeInclusive<i32> {
let t_env = TestEnv::from_env(ctx);
if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) {
@ -221,10 +244,17 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
// The integer argument to `jn` is an iteration count. Limit this to ensure tests can be
// completed in a reasonable amount of time.
if t_env.slow_platform || !cfg!(optimizations_enabled) {
let non_extensive_range = if t_env.slow_platform || !cfg!(optimizations_enabled) {
(-0xf)..=0xff
} else {
(-0xff)..=0xffff
};
let extensive_range = (-0xfff)..=0xfffff;
match gen_kind {
GeneratorKind::Extensive => extensive_range,
GeneratorKind::Domain | GeneratorKind::Random => non_extensive_range,
}
}
@ -241,7 +271,6 @@ pub fn check_near_count(_ctx: &CheckCtx) -> u64 {
}
/// Check whether extensive actions should be run or skipped.
#[expect(dead_code, reason = "extensive tests have not yet been added")]
pub fn skip_extensive_test(ctx: &CheckCtx) -> bool {
let t_env = TestEnv::from_env(ctx);
!t_env.should_run_extensive

View file

@ -0,0 +1,14 @@
//! `main` is just a wrapper to handle configuration.
#[cfg(not(feature = "test-multiprecision"))]
fn main() {
eprintln!("multiprecision not enabled; skipping extensive tests");
}
#[cfg(feature = "test-multiprecision")]
mod run;
#[cfg(feature = "test-multiprecision")]
fn main() {
run::run();
}

View file

@ -0,0 +1,234 @@
//! Exhaustive tests for `f16` and `f32`, high-iteration for `f64` and `f128`.
use std::fmt;
use std::io::{self, IsTerminal};
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
use indicatif::{ProgressBar, ProgressStyle};
use libm_test::gen::extensive::{self, ExtensiveInput};
use libm_test::mpfloat::MpOp;
use libm_test::{
CheckBasis, CheckCtx, CheckOutput, MathOp, TestResult, TupleCall, skip_extensive_test,
};
use libtest_mimic::{Arguments, Trial};
use rayon::prelude::*;
/// Run the extensive test suite.
pub fn run() {
let mut args = Arguments::from_args();
// Prevent multiple tests from running in parallel, each test gets parallized internally.
args.test_threads = Some(1);
let tests = register_all_tests();
// With default parallelism, the CPU doesn't saturate. We don't need to be nice to
// other processes, so do 1.5x to make sure we use all available resources.
let threads = std::thread::available_parallelism().map(Into::into).unwrap_or(0) * 3 / 2;
rayon::ThreadPoolBuilder::new().num_threads(threads).build_global().unwrap();
libtest_mimic::run(&args, tests).exit();
}
macro_rules! mp_extensive_tests {
(
fn_name: $fn_name:ident,
attrs: [$($attr:meta),*],
extra: [$push_to:ident],
) => {
$(#[$attr])*
register_single_test::<libm_test::op::$fn_name::Routine>(&mut $push_to);
};
}
/// Create a list of tests for consumption by `libtest_mimic`.
fn register_all_tests() -> Vec<Trial> {
let mut all_tests = Vec::new();
libm_macros::for_each_function! {
callback: mp_extensive_tests,
extra: [all_tests],
skip: [
// FIXME: MPFR tests needed
remquo,
remquof,
// FIXME: test needed, see
// https://github.com/rust-lang/libm/pull/311#discussion_r1818273392
nextafter,
nextafterf,
],
}
all_tests
}
/// Add a single test to the list.
fn register_single_test<Op>(all: &mut Vec<Trial>)
where
Op: MathOp + MpOp,
Op::RustArgs: ExtensiveInput<Op> + Send,
{
let test_name = format!("mp_extensive_{}", Op::NAME);
let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
let skip = skip_extensive_test(&ctx);
let runner = move || {
if !cfg!(optimizations_enabled) {
panic!("extensive tests should be run with --release");
}
let res = run_single_test::<Op>();
let e = match res {
Ok(()) => return Ok(()),
Err(e) => e,
};
// Format with the `Debug` implementation so we get the error cause chain, and print it
// here so we see the result immediately (rather than waiting for all tests to conclude).
let e = format!("{e:?}");
eprintln!("failure testing {}:{e}\n", Op::IDENTIFIER);
Err(e.into())
};
all.push(Trial::test(test_name, runner).with_ignored_flag(skip));
}
/// Test runner for a signle routine.
fn run_single_test<Op>() -> TestResult
where
Op: MathOp + MpOp,
Op::RustArgs: ExtensiveInput<Op> + Send,
{
// Small delay before printing anything so other output from the runner has a chance to flush.
std::thread::sleep(Duration::from_millis(500));
eprintln!();
let completed = AtomicU64::new(0);
let ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr);
let cases = &mut extensive::get_test_cases::<Op>(&ctx);
let total: u64 = cases.len().try_into().unwrap();
let pb = Progress::new(Op::NAME, total);
let test_single_chunk = |mp_vals: &mut Op::MpTy, input_vec: Vec<Op::RustArgs>| -> TestResult {
for input in input_vec {
// Test the input.
let mp_res = Op::run(mp_vals, input);
let crate_res = input.call(Op::ROUTINE);
crate_res.validate(mp_res, input, &ctx)?;
let completed = completed.fetch_add(1, Ordering::Relaxed) + 1;
pb.update(completed, input);
}
Ok(())
};
// Chunk the cases so Rayon doesn't switch threads between each iterator item. 50k seems near
// a performance sweet spot. Ideally we would reuse these allocations rather than discarding,
// but that is difficult with Rayon's API.
let chunk_size = 50_000;
let chunks = std::iter::from_fn(move || {
let mut v = Vec::with_capacity(chunk_size);
v.extend(cases.take(chunk_size));
(!v.is_empty()).then_some(v)
});
// Run the actual tests
let res = chunks.par_bridge().try_for_each_init(Op::new_mp, test_single_chunk);
let real_total = completed.load(Ordering::Relaxed);
pb.complete(real_total);
if res.is_ok() && real_total != total {
// Provide a warning if our estimate needs to be updated.
panic!("total run {real_total} does not match expected {total}");
}
res
}
/// Wrapper around a `ProgressBar` that handles styles and non-TTY messages.
struct Progress {
pb: ProgressBar,
name_padded: String,
final_style: ProgressStyle,
is_tty: bool,
}
impl Progress {
const PB_TEMPLATE: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \
{human_pos:>13}/{human_len:13} {per_sec:18} eta {eta:8} {msg}";
const PB_TEMPLATE_FINAL: &str = "[{elapsed:3} {percent:3}%] {bar:20.cyan/blue} NAME \
{human_pos:>13}/{human_len:13} {per_sec:18} done in {elapsed_precise}";
fn new(name: &str, total: u64) -> Self {
eprintln!("starting extensive tests for `{name}`");
let name_padded = format!("{name:9}");
let is_tty = io::stderr().is_terminal();
let initial_style =
ProgressStyle::with_template(&Self::PB_TEMPLATE.replace("NAME", &name_padded))
.unwrap()
.progress_chars("##-");
let final_style =
ProgressStyle::with_template(&Self::PB_TEMPLATE_FINAL.replace("NAME", &name_padded))
.unwrap()
.progress_chars("##-");
let pb = ProgressBar::new(total);
pb.set_style(initial_style);
Self { pb, final_style, name_padded, is_tty }
}
fn update(&self, completed: u64, input: impl fmt::Debug) {
// Infrequently update the progress bar.
if completed % 20_000 == 0 {
self.pb.set_position(completed);
}
if completed % 500_000 == 0 {
self.pb.set_message(format!("input: {input:<24?}"));
}
if !self.is_tty && completed % 5_000_000 == 0 {
let len = self.pb.length().unwrap_or_default();
eprintln!(
"[{elapsed:3?}s {percent:3.0}%] {name} \
{human_pos:>10}/{human_len:<10} {per_sec:14.2}/s eta {eta:4}s {input:<24?}",
elapsed = self.pb.elapsed().as_secs(),
percent = completed as f32 * 100.0 / len as f32,
name = self.name_padded,
human_pos = completed,
human_len = len,
per_sec = self.pb.per_sec(),
eta = self.pb.eta().as_secs()
);
}
}
fn complete(self, real_total: u64) {
self.pb.set_style(self.final_style);
self.pb.set_position(real_total);
self.pb.abandon();
if !self.is_tty {
let len = self.pb.length().unwrap_or_default();
eprintln!(
"[{elapsed:3}s {percent:3.0}%] {name} \
{human_pos:>10}/{human_len:<10} {per_sec:14.2}/s done in {elapsed_precise}",
elapsed = self.pb.elapsed().as_secs(),
percent = real_total as f32 * 100.0 / len as f32,
name = self.name_padded,
human_pos = real_total,
human_len = len,
per_sec = self.pb.per_sec(),
elapsed_precise = self.pb.elapsed().as_secs(),
);
}
eprintln!();
}
}