From 6cd17ff4d23977ea6df711f3197b191994a5513e Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sun, 19 May 2024 05:02:25 -0400 Subject: [PATCH] Add benchmarks for floating point math This adds comparisons among the compiler-builtins function, system functions if available, and optionally handwritten assembly. These also help us identify inconsistencies between this crate and system functions, which may otherwise go unnoticed if intrinsics get lowered to inline operations rather than library calls. --- library/compiler-builtins/ci/run.sh | 6 +- .../compiler-builtins/testcrate/Cargo.toml | 48 +- .../bench-175b45d1-aarch64-macos.txt | 500 +++++++++++++ .../bench-3cee6376-aarch64-macos.txt | 699 ++++++++++++++++++ .../testcrate/benches/float_add.rs | 81 ++ .../testcrate/benches/float_cmp.rs | 202 +++++ .../testcrate/benches/float_conv.rs | 547 ++++++++++++++ .../testcrate/benches/float_div.rs | 70 ++ .../testcrate/benches/float_extend.rs | 93 +++ .../testcrate/benches/float_mul.rs | 81 ++ .../testcrate/benches/float_pow.rs | 24 + .../testcrate/benches/float_sub.rs | 81 ++ .../testcrate/benches/float_trunc.rs | 127 ++++ library/compiler-builtins/testcrate/build.rs | 19 + .../compiler-builtins/testcrate/src/bench.rs | 348 +++++++++ .../compiler-builtins/testcrate/src/lib.rs | 6 + 16 files changed, 2930 insertions(+), 2 deletions(-) create mode 100644 library/compiler-builtins/testcrate/bench-175b45d1-aarch64-macos.txt create mode 100644 library/compiler-builtins/testcrate/bench-3cee6376-aarch64-macos.txt create mode 100644 library/compiler-builtins/testcrate/benches/float_add.rs create mode 100644 library/compiler-builtins/testcrate/benches/float_cmp.rs create mode 100644 library/compiler-builtins/testcrate/benches/float_conv.rs create mode 100644 library/compiler-builtins/testcrate/benches/float_div.rs create mode 100644 library/compiler-builtins/testcrate/benches/float_extend.rs create mode 100644 library/compiler-builtins/testcrate/benches/float_mul.rs create mode 100644 library/compiler-builtins/testcrate/benches/float_pow.rs create mode 100644 library/compiler-builtins/testcrate/benches/float_sub.rs create mode 100644 library/compiler-builtins/testcrate/benches/float_trunc.rs create mode 100644 library/compiler-builtins/testcrate/src/bench.rs diff --git a/library/compiler-builtins/ci/run.sh b/library/compiler-builtins/ci/run.sh index 2512dc633e53..dcbe1caf4ef4 100755 --- a/library/compiler-builtins/ci/run.sh +++ b/library/compiler-builtins/ci/run.sh @@ -4,7 +4,9 @@ set -eux target="${1:-}" -if [ -z "${1:-}" ]; then +export RUST_BACKTRACE="${RUST_BACKTRACE:-full}" + +if [ -z "$target" ]; then host_target=$(rustc -vV | awk '/^host/ { print $2 }') echo "Defaulted to host target $host_target" target="$host_target" @@ -30,6 +32,8 @@ else $run --features no-asm --release $run --features no-f16-f128 $run --features no-f16-f128 --release + $run --benches + $run --benches --release fi if [ "${TEST_VERBATIM:-}" = "1" ]; then diff --git a/library/compiler-builtins/testcrate/Cargo.toml b/library/compiler-builtins/testcrate/Cargo.toml index 1de0c39761ed..6b5c4cf48d2a 100644 --- a/library/compiler-builtins/testcrate/Cargo.toml +++ b/library/compiler-builtins/testcrate/Cargo.toml @@ -21,6 +21,10 @@ path = ".." default-features = false features = ["public-test-deps"] +[dev-dependencies] +criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] } +paste = "1.0.15" + [target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies] test = { git = "https://github.com/japaric/utest" } utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" } @@ -34,6 +38,48 @@ no-f16-f128 = ["compiler_builtins/no-f16-f128"] mem = ["compiler_builtins/mem"] mangled-names = ["compiler_builtins/mangled-names"] # Skip tests that rely on f128 symbols being available on the system -no-sys-f128 = ["no-sys-f128-int-convert"] +no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"] # Some platforms have some f128 functions but everything except integer conversions no-sys-f128-int-convert = [] +no-sys-f16-f128-convert = [] +# Skip tests that rely on f16 symbols being available on the system +no-sys-f16 = [] + +# Enable report generation without bringing in more dependencies by default +benchmarking-reports = ["criterion/plotters", "criterion/html_reports"] + +[[bench]] +name = "float_add" +harness = false + +[[bench]] +name = "float_sub" +harness = false + +[[bench]] +name = "float_mul" +harness = false + +[[bench]] +name = "float_div" +harness = false + +[[bench]] +name = "float_cmp" +harness = false + +[[bench]] +name = "float_conv" +harness = false + +[[bench]] +name = "float_extend" +harness = false + +[[bench]] +name = "float_trunc" +harness = false + +[[bench]] +name = "float_pow" +harness = false diff --git a/library/compiler-builtins/testcrate/bench-175b45d1-aarch64-macos.txt b/library/compiler-builtins/testcrate/bench-175b45d1-aarch64-macos.txt new file mode 100644 index 000000000000..e79bbe368735 --- /dev/null +++ b/library/compiler-builtins/testcrate/bench-175b45d1-aarch64-macos.txt @@ -0,0 +1,500 @@ + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s + +add_f32 compiler-builtins + time: [35.804 µs 35.863 µs 35.920 µs] +Found 5 outliers among 100 measurements (5.00%) + 2 (2.00%) high mild + 3 (3.00%) high severe + +add_f32 system time: [39.084 µs 39.127 µs 39.169 µs] +Found 11 outliers among 100 measurements (11.00%) + 7 (7.00%) high mild + 4 (4.00%) high severe + +add_f32 assembly (aarch64 unix) + time: [8.1034 µs 8.1441 µs 8.1866 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +add_f64 compiler-builtins + time: [35.647 µs 35.725 µs 35.799 µs] +Found 10 outliers among 100 measurements (10.00%) + 8 (8.00%) high mild + 2 (2.00%) high severe + +add_f64 system time: [39.308 µs 39.322 µs 39.336 µs] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +add_f64 assembly (aarch64 unix) + time: [8.0401 µs 8.0442 µs 8.0499 µs] +Found 11 outliers among 100 measurements (11.00%) + 2 (2.00%) high mild + 9 (9.00%) high severe + +add_f128 compiler-builtins + time: [41.801 µs 41.986 µs 42.201 µs] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +cmp_f32_gt compiler-builtins + time: [13.579 µs 13.675 µs 13.778 µs] +Found 16 outliers among 100 measurements (16.00%) + 6 (6.00%) high mild + 10 (10.00%) high severe + +cmp_f32_gt system time: [12.343 µs 12.348 µs 12.355 µs] +Found 13 outliers among 100 measurements (13.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 9 (9.00%) high severe + +cmp_f32_gt assembly (aarch64 unix) + time: [8.2593 µs 8.3185 µs 8.3813 µs] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +cmp_f32_unord compiler-builtins + time: [11.977 µs 12.042 µs 12.109 µs] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) low severe + 6 (6.00%) low mild + 2 (2.00%) high mild + +cmp_f32_unord system time: [8.1236 µs 8.1736 µs 8.2350 µs] +Found 18 outliers among 100 measurements (18.00%) + 5 (5.00%) high mild + 13 (13.00%) high severe + +cmp_f32_unord assembly (aarch64 unix) + time: [8.1446 µs 8.2080 µs 8.2762 µs] +Found 14 outliers among 100 measurements (14.00%) + 6 (6.00%) high mild + 8 (8.00%) high severe + +cmp_f64_gt compiler-builtins + time: [16.073 µs 16.077 µs 16.082 µs] +Found 17 outliers among 100 measurements (17.00%) + 2 (2.00%) low mild + 4 (4.00%) high mild + 11 (11.00%) high severe + +cmp_f64_gt system time: [12.456 µs 12.487 µs 12.522 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +cmp_f64_gt assembly (aarch64 unix) + time: [8.0557 µs 8.0616 µs 8.0685 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +cmp_f64_unord compiler-builtins + time: [10.715 µs 10.724 µs 10.737 µs] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +cmp_f64_unord system time: [8.0692 µs 8.0734 µs 8.0784 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +cmp_f64_unord assembly (aarch64 unix) + time: [8.0569 µs 8.0677 µs 8.0818 µs] +Found 18 outliers among 100 measurements (18.00%) + 4 (4.00%) high mild + 14 (14.00%) high severe + +cmp_f128_gt compiler-builtins + time: [18.234 µs 18.401 µs 18.602 µs] + +cmp_f128_unord compiler-builtins + time: [13.410 µs 13.471 µs 13.542 µs] +Found 7 outliers among 100 measurements (7.00%) + 7 (7.00%) high mild + +conv_u32_f32 compiler-builtins + time: [774.58 ns 776.01 ns 777.59 ns] +Found 9 outliers among 100 measurements (9.00%) + 2 (2.00%) high mild + 7 (7.00%) high severe + +conv_u32_f32 system time: [622.68 ns 625.64 ns 629.26 ns] +Found 16 outliers among 100 measurements (16.00%) + 7 (7.00%) high mild + 9 (9.00%) high severe + +conv_u32_f32 assembly (aarch64 unix) + time: [468.05 ns 469.76 ns 471.46 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_u32_f64 compiler-builtins + time: [617.61 ns 618.00 ns 618.52 ns] +Found 13 outliers among 100 measurements (13.00%) + 4 (4.00%) high mild + 9 (9.00%) high severe + +conv_u32_f64 system time: [469.56 ns 471.03 ns 472.81 ns] +Found 11 outliers among 100 measurements (11.00%) + 7 (7.00%) high mild + 4 (4.00%) high severe + +conv_u32_f64 assembly (aarch64 unix) + time: [464.43 ns 465.01 ns 465.72 ns] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) high mild + 8 (8.00%) high severe + +conv_u64_f32 compiler-builtins + time: [847.95 ns 848.19 ns 848.46 ns] +Found 19 outliers among 100 measurements (19.00%) + 3 (3.00%) low mild + 9 (9.00%) high mild + 7 (7.00%) high severe + +conv_u64_f32 system time: [701.68 ns 701.95 ns 702.30 ns] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) high mild + 6 (6.00%) high severe + +conv_u64_f32 assembly (aarch64 unix) + time: [511.73 ns 512.43 ns 513.32 ns] +Found 6 outliers among 100 measurements (6.00%) + 6 (6.00%) high mild + +conv_u64_f64 compiler-builtins + time: [681.23 ns 682.55 ns 684.30 ns] +Found 18 outliers among 100 measurements (18.00%) + 1 (1.00%) high mild + 17 (17.00%) high severe + +conv_u64_f64 system time: [679.34 ns 679.57 ns 679.88 ns] +Found 18 outliers among 100 measurements (18.00%) + 1 (1.00%) low mild + 6 (6.00%) high mild + 11 (11.00%) high severe + +conv_u64_f64 assembly (aarch64 unix) + time: [509.90 ns 510.09 ns 510.30 ns] +Found 15 outliers among 100 measurements (15.00%) + 6 (6.00%) high mild + 9 (9.00%) high severe + +conv_u128_f32 compiler-builtins + time: [1.1368 µs 1.1372 µs 1.1377 µs] +Found 14 outliers among 100 measurements (14.00%) + 8 (8.00%) high mild + 6 (6.00%) high severe + +conv_u128_f32 system time: [1.4338 µs 1.4370 µs 1.4410 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe + +conv_u128_f64 compiler-builtins + time: [1.0133 µs 1.0143 µs 1.0156 µs] +Found 16 outliers among 100 measurements (16.00%) + 2 (2.00%) high mild + 14 (14.00%) high severe + +conv_u128_f64 system time: [1.3473 µs 1.3530 µs 1.3600 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_i32_f32 compiler-builtins + time: [906.53 ns 907.86 ns 909.23 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe + +conv_i32_f32 system time: [914.53 ns 915.69 ns 917.01 ns] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) high mild + 4 (4.00%) high severe + +conv_i32_f32 assembly (aarch64 unix) + time: [464.55 ns 465.10 ns 465.83 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_i32_f64 compiler-builtins + time: [617.63 ns 617.92 ns 618.27 ns] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe + +conv_i32_f64 system time: [622.83 ns 624.19 ns 625.61 ns] +Found 6 outliers among 100 measurements (6.00%) + 5 (5.00%) high mild + 1 (1.00%) high severe + +conv_i32_f64 assembly (aarch64 unix) + time: [465.24 ns 466.04 ns 466.95 ns] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe + +conv_i64_f32 compiler-builtins + time: [852.67 ns 853.92 ns 855.34 ns] +Found 11 outliers among 100 measurements (11.00%) + 3 (3.00%) high mild + 8 (8.00%) high severe + +conv_i64_f32 system time: [906.94 ns 908.04 ns 909.33 ns] +Found 15 outliers among 100 measurements (15.00%) + 2 (2.00%) high mild + 13 (13.00%) high severe + +conv_i64_f32 assembly (aarch64 unix) + time: [510.84 ns 511.27 ns 511.80 ns] +Found 8 outliers among 100 measurements (8.00%) + 3 (3.00%) high mild + 5 (5.00%) high severe + +conv_i64_f64 compiler-builtins + time: [932.35 ns 932.97 ns 933.76 ns] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) high mild + 6 (6.00%) high severe + +conv_i64_f64 system time: [955.91 ns 958.95 ns 962.05 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_i64_f64 assembly (aarch64 unix) + time: [510.19 ns 510.72 ns 511.44 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_i128_f32 compiler-builtins + time: [1.4248 µs 1.4285 µs 1.4323 µs] +Found 12 outliers among 100 measurements (12.00%) + 7 (7.00%) high mild + 5 (5.00%) high severe + +conv_i128_f32 system time: [1.6970 µs 1.7017 µs 1.7069 µs] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_i128_f64 compiler-builtins + time: [1.3132 µs 1.3161 µs 1.3191 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_i128_f64 system time: [1.6071 µs 1.6100 µs 1.6133 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u32 compiler-builtins + time: [640.35 ns 641.00 ns 641.68 ns] +Found 6 outliers among 100 measurements (6.00%) + 4 (4.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32 system time: [640.87 ns 641.63 ns 642.42 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32 assembly (aarch64 unix) + time: [482.02 ns 482.67 ns 483.38 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_f64_u64 compiler-builtins + time: [638.58 ns 638.98 ns 639.45 ns] +Found 15 outliers among 100 measurements (15.00%) + 1 (1.00%) high mild + 14 (14.00%) high severe + +conv_f64_u64 system time: [642.54 ns 644.07 ns 645.59 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u64 assembly (aarch64 unix) + time: [482.65 ns 483.70 ns 484.87 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +conv_f64_u128 compiler-builtins + time: [1.0631 µs 1.0652 µs 1.0674 µs] +Found 8 outliers among 100 measurements (8.00%) + 7 (7.00%) high mild + 1 (1.00%) high severe + +conv_f64_u128 system time: [821.41 ns 823.45 ns 825.74 ns] +Found 11 outliers among 100 measurements (11.00%) + 8 (8.00%) high mild + 3 (3.00%) high severe + +conv_f64_i32 compiler-builtins + time: [826.76 ns 845.08 ns 870.23 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +conv_f64_i32 system time: [764.12 ns 764.63 ns 765.26 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe + +conv_f64_i32 assembly (aarch64 unix) + time: [484.50 ns 485.98 ns 487.54 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f64_i64 compiler-builtins + time: [797.27 ns 798.19 ns 799.84 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_f64_i64 system time: [768.74 ns 769.52 ns 770.23 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_f64_i64 assembly (aarch64 unix) + time: [480.59 ns 481.03 ns 481.46 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128 compiler-builtins + time: [1.0577 µs 1.0591 µs 1.0606 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128 system time: [1.0181 µs 1.0195 µs 1.0211 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +conv_f32_u32 compiler-builtins + time: [800.40 ns 801.39 ns 802.35 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +conv_f32_u32 system time: [638.12 ns 638.34 ns 638.63 ns] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe + +conv_f32_u32 assembly (aarch64 unix) + time: [479.37 ns 480.97 ns 483.32 ns] +Found 13 outliers among 100 measurements (13.00%) + 6 (6.00%) high mild + 7 (7.00%) high severe + +conv_f32_u64 compiler-builtins + time: [801.95 ns 803.64 ns 805.75 ns] + +conv_f32_u64 system time: [638.20 ns 638.56 ns 639.07 ns] +Found 10 outliers among 100 measurements (10.00%) + 1 (1.00%) high mild + 9 (9.00%) high severe + +conv_f32_u64 assembly (aarch64 unix) + time: [480.07 ns 480.47 ns 480.86 ns] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_f32_u128 compiler-builtins + time: [1.1579 µs 1.1623 µs 1.1657 µs] +Found 14 outliers among 100 measurements (14.00%) + 2 (2.00%) low severe + 7 (7.00%) high mild + 5 (5.00%) high severe + +conv_f32_u128 system time: [1.0344 µs 1.0394 µs 1.0450 µs] + +conv_f32_i32 compiler-builtins + time: [800.14 ns 801.52 ns 803.26 ns] +Found 10 outliers among 100 measurements (10.00%) + 8 (8.00%) high mild + 2 (2.00%) high severe + +conv_f32_i32 system time: [741.36 ns 741.74 ns 742.13 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_f32_i32 assembly (aarch64 unix) + time: [484.35 ns 486.08 ns 488.11 ns] +Found 17 outliers among 100 measurements (17.00%) + 9 (9.00%) high mild + 8 (8.00%) high severe + +conv_f32_i64 compiler-builtins + time: [800.94 ns 802.68 ns 804.74 ns] + +conv_f32_i64 system time: [748.60 ns 750.68 ns 753.16 ns] +Found 9 outliers among 100 measurements (9.00%) + 4 (4.00%) high mild + 5 (5.00%) high severe + +conv_f32_i64 assembly (aarch64 unix) + time: [480.70 ns 481.23 ns 481.82 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_f32_i128 compiler-builtins + time: [1.1774 µs 1.1829 µs 1.1887 µs] +Found 11 outliers among 100 measurements (11.00%) + 1 (1.00%) low severe + 7 (7.00%) low mild + 1 (1.00%) high mild + 2 (2.00%) high severe + +conv_f32_i128 system time: [1.1785 µs 1.1853 µs 1.1941 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe + +div_f32 compiler-builtins + time: [38.852 µs 39.011 µs 39.178 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +div_f32 system time: [41.846 µs 41.920 µs 42.005 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +div_f32 assembly (aarch64 unix) + time: [8.1309 µs 8.1627 µs 8.2005 µs] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +div_f64 compiler-builtins + time: [50.369 µs 50.605 µs 50.857 µs] +Found 15 outliers among 100 measurements (15.00%) + 11 (11.00%) high mild + 4 (4.00%) high severe + +div_f64 system time: [53.506 µs 53.582 µs 53.676 µs] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) high mild + 4 (4.00%) high severe + +div_f64 assembly (aarch64 unix) + time: [8.0695 µs 8.0807 µs 8.0948 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + diff --git a/library/compiler-builtins/testcrate/bench-3cee6376-aarch64-macos.txt b/library/compiler-builtins/testcrate/bench-3cee6376-aarch64-macos.txt new file mode 100644 index 000000000000..131e7a85a0b5 --- /dev/null +++ b/library/compiler-builtins/testcrate/bench-3cee6376-aarch64-macos.txt @@ -0,0 +1,699 @@ + +running 0 tests + +test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s + +add_f32/compiler-builtins + time: [36.813 µs 37.048 µs 37.303 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +add_f32/system time: [39.103 µs 39.142 µs 39.189 µs] +Found 8 outliers among 100 measurements (8.00%) + 2 (2.00%) high mild + 6 (6.00%) high severe +add_f32/assembly (aarch64 unix) + time: [8.3786 µs 8.4680 µs 8.5570 µs] + +add_f64/compiler-builtins + time: [35.784 µs 35.819 µs 35.863 µs] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) high mild + 3 (3.00%) high severe +add_f64/system time: [39.634 µs 39.689 µs 39.746 µs] +Found 16 outliers among 100 measurements (16.00%) + 4 (4.00%) high mild + 12 (12.00%) high severe +add_f64/assembly (aarch64 unix) + time: [8.0533 µs 8.0599 µs 8.0670 µs] +Found 14 outliers among 100 measurements (14.00%) + 6 (6.00%) high mild + 8 (8.00%) high severe + +add_f128/compiler-builtins + time: [41.830 µs 41.920 µs 42.005 µs] + +cmp_f32_gt/compiler-builtins + time: [13.405 µs 13.411 µs 13.418 µs] +Found 18 outliers among 100 measurements (18.00%) + 4 (4.00%) high mild + 14 (14.00%) high severe +cmp_f32_gt/system time: [12.348 µs 12.355 µs 12.363 µs] +Found 12 outliers among 100 measurements (12.00%) + 2 (2.00%) high mild + 10 (10.00%) high severe +cmp_f32_gt/assembly (aarch64 unix) + time: [8.1233 µs 8.1625 µs 8.2072 µs] +Found 12 outliers among 100 measurements (12.00%) + 7 (7.00%) high mild + 5 (5.00%) high severe + +cmp_f32_unord/compiler-builtins + time: [11.349 µs 11.467 µs 11.584 µs] +cmp_f32_unord/system time: [8.0714 µs 8.0792 µs 8.0890 µs] +Found 16 outliers among 100 measurements (16.00%) + 4 (4.00%) high mild + 12 (12.00%) high severe +cmp_f32_unord/assembly (aarch64 unix) + time: [8.1121 µs 8.1705 µs 8.2325 µs] +Found 20 outliers among 100 measurements (20.00%) + 3 (3.00%) high mild + 17 (17.00%) high severe + +cmp_f64_gt/compiler-builtins + time: [13.749 µs 13.837 µs 13.934 µs] +Found 20 outliers among 100 measurements (20.00%) + 9 (9.00%) low mild + 7 (7.00%) high mild + 4 (4.00%) high severe +cmp_f64_gt/system time: [12.475 µs 12.515 µs 12.565 µs] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild +cmp_f64_gt/assembly (aarch64 unix) + time: [8.0456 µs 8.0540 µs 8.0653 µs] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe + +cmp_f64_unord/compiler-builtins + time: [10.723 µs 10.730 µs 10.739 µs] +Found 15 outliers among 100 measurements (15.00%) + 5 (5.00%) high mild + 10 (10.00%) high severe +cmp_f64_unord/system time: [8.0944 µs 8.1296 µs 8.1683 µs] +Found 17 outliers among 100 measurements (17.00%) + 4 (4.00%) high mild + 13 (13.00%) high severe +cmp_f64_unord/assembly (aarch64 unix) + time: [8.1042 µs 8.1337 µs 8.1662 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +cmp_f128_gt/compiler-builtins + time: [20.508 µs 20.558 µs 20.615 µs] +Found 8 outliers among 100 measurements (8.00%) + 2 (2.00%) high mild + 6 (6.00%) high severe + +cmp_f128_unord/compiler-builtins + time: [13.332 µs 13.346 µs 13.360 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe + +conv_u32_f32/compiler-builtins + time: [621.20 ns 621.89 ns 622.65 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe +conv_u32_f32/system time: [621.44 ns 622.08 ns 622.74 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u32_f32/assembly (aarch64 unix) + time: [465.96 ns 466.65 ns 467.45 ns] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +conv_u32_f64/compiler-builtins + time: [619.71 ns 620.51 ns 621.52 ns] +Found 5 outliers among 100 measurements (5.00%) + 4 (4.00%) high mild + 1 (1.00%) high severe +conv_u32_f64/system time: [466.60 ns 467.14 ns 467.77 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild +conv_u32_f64/assembly (aarch64 unix) + time: [464.02 ns 464.32 ns 464.69 ns] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +conv_u64_f32/compiler-builtins + time: [851.24 ns 852.98 ns 854.77 ns] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +conv_u64_f32/system time: [724.35 ns 729.43 ns 735.07 ns] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild +conv_u64_f32/assembly (aarch64 unix) + time: [513.30 ns 514.64 ns 516.16 ns] +Found 8 outliers among 100 measurements (8.00%) + 8 (8.00%) high mild + +conv_u64_f64/compiler-builtins + time: [850.72 ns 853.26 ns 856.54 ns] +Found 15 outliers among 100 measurements (15.00%) + 2 (2.00%) high mild + 13 (13.00%) high severe +conv_u64_f64/system time: [681.43 ns 682.54 ns 683.79 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u64_f64/assembly (aarch64 unix) + time: [511.37 ns 511.71 ns 512.02 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_u128_f32/compiler-builtins + time: [1.1395 µs 1.1409 µs 1.1424 µs] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) high mild + 4 (4.00%) high severe +conv_u128_f32/system time: [1.4348 µs 1.4369 µs 1.4390 µs] +Found 5 outliers among 100 measurements (5.00%) + 4 (4.00%) high mild + 1 (1.00%) high severe + +conv_u128_f64/compiler-builtins + time: [1.0148 µs 1.0157 µs 1.0167 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_u128_f64/system time: [1.3404 µs 1.3423 µs 1.3442 µs] +Found 8 outliers among 100 measurements (8.00%) + 7 (7.00%) high mild + 1 (1.00%) high severe + +conv_i32_f32/compiler-builtins + time: [902.89 ns 903.81 ns 904.84 ns] +Found 7 outliers among 100 measurements (7.00%) + 4 (4.00%) high mild + 3 (3.00%) high severe +conv_i32_f32/system time: [942.62 ns 949.04 ns 955.77 ns] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe +conv_i32_f32/assembly (aarch64 unix) + time: [466.06 ns 466.60 ns 467.27 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high severe + +conv_i32_f64/compiler-builtins + time: [618.98 ns 619.24 ns 619.55 ns] +Found 17 outliers among 100 measurements (17.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 13 (13.00%) high severe +conv_i32_f64/system time: [622.18 ns 623.41 ns 624.85 ns] +Found 8 outliers among 100 measurements (8.00%) + 5 (5.00%) high mild + 3 (3.00%) high severe +conv_i32_f64/assembly (aarch64 unix) + time: [466.26 ns 466.76 ns 467.35 ns] +Found 9 outliers among 100 measurements (9.00%) + 5 (5.00%) high mild + 4 (4.00%) high severe + +conv_i64_f32/compiler-builtins + time: [850.11 ns 850.45 ns 850.88 ns] +Found 15 outliers among 100 measurements (15.00%) + 1 (1.00%) low severe + 1 (1.00%) low mild + 3 (3.00%) high mild + 10 (10.00%) high severe +conv_i64_f32/system time: [908.36 ns 908.70 ns 909.10 ns] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe +conv_i64_f32/assembly (aarch64 unix) + time: [513.56 ns 514.44 ns 515.38 ns] +Found 8 outliers among 100 measurements (8.00%) + 8 (8.00%) high mild + +conv_i64_f64/compiler-builtins + time: [935.39 ns 935.78 ns 936.26 ns] +Found 13 outliers among 100 measurements (13.00%) + 5 (5.00%) high mild + 8 (8.00%) high severe +conv_i64_f64/system time: [946.56 ns 947.33 ns 948.20 ns] +Found 8 outliers among 100 measurements (8.00%) + 6 (6.00%) high mild + 2 (2.00%) high severe +conv_i64_f64/assembly (aarch64 unix) + time: [511.55 ns 512.03 ns 512.56 ns] +Found 21 outliers among 100 measurements (21.00%) + 4 (4.00%) high mild + 17 (17.00%) high severe + +conv_i128_f32/compiler-builtins + time: [1.4206 µs 1.4218 µs 1.4232 µs] +Found 10 outliers among 100 measurements (10.00%) + 5 (5.00%) high mild + 5 (5.00%) high severe +conv_i128_f32/system time: [1.6863 µs 1.6891 µs 1.6922 µs] +Found 10 outliers among 100 measurements (10.00%) + 9 (9.00%) high mild + 1 (1.00%) high severe + +conv_i128_f64/compiler-builtins + time: [1.3110 µs 1.3122 µs 1.3136 µs] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe +conv_i128_f64/system time: [1.6022 µs 1.6048 µs 1.6090 µs] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +conv_f64_u32/compiler-builtins + time: [798.65 ns 799.42 ns 800.39 ns] +Found 15 outliers among 100 measurements (15.00%) + 6 (6.00%) high mild + 9 (9.00%) high severe +conv_f64_u32/system time: [639.48 ns 639.88 ns 640.40 ns] +Found 16 outliers among 100 measurements (16.00%) + 1 (1.00%) low mild + 5 (5.00%) high mild + 10 (10.00%) high severe +conv_f64_u32/assembly (aarch64 unix) + time: [480.78 ns 481.35 ns 482.17 ns] +Found 7 outliers among 100 measurements (7.00%) + 5 (5.00%) high mild + 2 (2.00%) high severe + +conv_f64_u64/compiler-builtins + time: [799.56 ns 800.54 ns 801.89 ns] +Found 4 outliers among 100 measurements (4.00%) + 2 (2.00%) high mild + 2 (2.00%) high severe +conv_f64_u64/system time: [640.72 ns 641.24 ns 641.81 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe +conv_f64_u64/assembly (aarch64 unix) + time: [481.54 ns 482.48 ns 483.53 ns] +Found 6 outliers among 100 measurements (6.00%) + 1 (1.00%) low severe + 1 (1.00%) low mild + 3 (3.00%) high mild + 1 (1.00%) high severe + +conv_f64_u128/compiler-builtins + time: [1.0510 µs 1.0515 µs 1.0520 µs] +Found 13 outliers among 100 measurements (13.00%) + 1 (1.00%) low mild + 2 (2.00%) high mild + 10 (10.00%) high severe +conv_f64_u128/system time: [818.45 ns 819.23 ns 820.15 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +conv_f64_i32/compiler-builtins + time: [800.56 ns 801.31 ns 802.21 ns] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe +conv_f64_i32/system time: [765.62 ns 766.15 ns 766.80 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +conv_f64_i32/assembly (aarch64 unix) + time: [471.65 ns 472.77 ns 473.89 ns] +Found 10 outliers among 100 measurements (10.00%) + 1 (1.00%) low mild + 8 (8.00%) high mild + 1 (1.00%) high severe + +conv_f64_i64/compiler-builtins + time: [801.00 ns 804.55 ns 808.72 ns] +Found 18 outliers among 100 measurements (18.00%) + 6 (6.00%) high mild + 12 (12.00%) high severe +conv_f64_i64/system time: [770.28 ns 772.47 ns 775.21 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild +conv_f64_i64/assembly (aarch64 unix) + time: [491.56 ns 494.96 ns 499.19 ns] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +conv_f64_i128/compiler-builtins + time: [1.0637 µs 1.0704 µs 1.0762 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +conv_f64_i128/system time: [1.0022 µs 1.0027 µs 1.0033 µs] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) low severe + 3 (3.00%) high severe + +conv_f32_u32/compiler-builtins + time: [644.56 ns 647.01 ns 649.95 ns] +Found 15 outliers among 100 measurements (15.00%) + 13 (13.00%) high mild + 2 (2.00%) high severe +conv_f32_u32/system time: [648.12 ns 651.20 ns 654.54 ns] +Found 9 outliers among 100 measurements (9.00%) + 7 (7.00%) high mild + 2 (2.00%) high severe +conv_f32_u32/assembly (aarch64 unix) + time: [481.02 ns 482.71 ns 484.60 ns] +Found 12 outliers among 100 measurements (12.00%) + 1 (1.00%) low mild + 10 (10.00%) high mild + 1 (1.00%) high severe + +conv_f32_u64/compiler-builtins + time: [644.14 ns 646.61 ns 649.53 ns] +Found 11 outliers among 100 measurements (11.00%) + 6 (6.00%) high mild + 5 (5.00%) high severe +conv_f32_u64/system time: [646.21 ns 650.17 ns 654.55 ns] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild +conv_f32_u64/assembly (aarch64 unix) + time: [473.36 ns 474.60 ns 476.00 ns] +Found 9 outliers among 100 measurements (9.00%) + 2 (2.00%) low mild + 5 (5.00%) high mild + 2 (2.00%) high severe + +conv_f32_u128/compiler-builtins + time: [1.0820 µs 1.0828 µs 1.0839 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe +conv_f32_u128/system time: [1.0003 µs 1.0042 µs 1.0076 µs] +Found 21 outliers among 100 measurements (21.00%) + 1 (1.00%) low mild + 3 (3.00%) high mild + 17 (17.00%) high severe + +conv_f32_i32/compiler-builtins + time: [801.13 ns 801.82 ns 802.53 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe +conv_f32_i32/system time: [745.17 ns 745.97 ns 746.78 ns] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high severe +conv_f32_i32/assembly (aarch64 unix) + time: [469.87 ns 470.65 ns 471.57 ns] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +conv_f32_i64/compiler-builtins + time: [799.44 ns 799.94 ns 800.59 ns] +Found 4 outliers among 100 measurements (4.00%) + 1 (1.00%) high mild + 3 (3.00%) high severe +conv_f32_i64/system time: [744.81 ns 745.17 ns 745.62 ns] +Found 14 outliers among 100 measurements (14.00%) + 5 (5.00%) high mild + 9 (9.00%) high severe +conv_f32_i64/assembly (aarch64 unix) + time: [465.06 ns 466.01 ns 467.12 ns] +Found 13 outliers among 100 measurements (13.00%) + 2 (2.00%) low severe + 5 (5.00%) high mild + 6 (6.00%) high severe + +conv_f32_i128/compiler-builtins + time: [1.1390 µs 1.1515 µs 1.1637 µs] +conv_f32_i128/system time: [1.1315 µs 1.1330 µs 1.1347 µs] +Found 6 outliers among 100 measurements (6.00%) + 3 (3.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe + +div_f32/compiler-builtins + time: [39.408 µs 39.676 µs 39.969 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +div_f32/system time: [42.108 µs 42.248 µs 42.528 µs] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe +div_f32/assembly (aarch64 unix) + time: [8.0724 µs 8.0794 µs 8.0870 µs] +Found 7 outliers among 100 measurements (7.00%) + 5 (5.00%) high mild + 2 (2.00%) high severe + +div_f64/compiler-builtins + time: [49.992 µs 50.014 µs 50.040 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high severe +div_f64/system time: [53.577 µs 53.651 µs 53.743 µs] +Found 6 outliers among 100 measurements (6.00%) + 4 (4.00%) high mild + 2 (2.00%) high severe +div_f64/assembly (aarch64 unix) + time: [8.0976 µs 8.1064 µs 8.1158 µs] +Found 6 outliers among 100 measurements (6.00%) + 3 (3.00%) high mild + 3 (3.00%) high severe + +extend_f16_f32/compiler-builtins + time: [804.09 ns 805.38 ns 807.09 ns] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe +extend_f16_f32/system time: [641.07 ns 641.76 ns 642.60 ns] +Found 12 outliers among 100 measurements (12.00%) + 6 (6.00%) high mild + 6 (6.00%) high severe +extend_f16_f32/assembly (aarch64 unix) + time: [456.69 ns 457.14 ns 457.68 ns] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) low mild + 2 (2.00%) high mild + 2 (2.00%) high severe + +extend_f16_f128/compiler-builtins + time: [1.1025 µs 1.1035 µs 1.1045 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe + +extend_f32_f64/compiler-builtins + time: [799.30 ns 799.68 ns 800.16 ns] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe +extend_f32_f64/system time: [992.48 ns 993.27 ns 994.32 ns] +Found 15 outliers among 100 measurements (15.00%) + 3 (3.00%) high mild + 12 (12.00%) high severe +extend_f32_f64/assembly (aarch64 unix) + time: [457.65 ns 460.39 ns 463.78 ns] + +extend_f32_f128/compiler-builtins + time: [1.0295 µs 1.0311 µs 1.0327 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) low mild + 1 (1.00%) high mild + +extend_f64_f128/compiler-builtins + time: [1.0400 µs 1.0412 µs 1.0426 µs] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +mul_f32/compiler-builtins + time: [25.604 µs 25.705 µs 25.818 µs] +Found 23 outliers among 100 measurements (23.00%) + 17 (17.00%) low severe + 3 (3.00%) high mild + 3 (3.00%) high severe +mul_f32/system time: [29.914 µs 29.977 µs 30.043 µs] +Found 5 outliers among 100 measurements (5.00%) + 5 (5.00%) high mild +mul_f32/assembly (aarch64 unix) + time: [8.1384 µs 8.1964 µs 8.2603 µs] +Found 13 outliers among 100 measurements (13.00%) + 3 (3.00%) high mild + 10 (10.00%) high severe + +mul_f64/compiler-builtins + time: [25.596 µs 25.615 µs 25.637 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +mul_f64/system time: [30.931 µs 30.963 µs 31.002 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild +mul_f64/assembly (aarch64 unix) + time: [8.0589 µs 8.0638 µs 8.0695 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe + +mul_f128/compiler-builtins + time: [54.242 µs 54.306 µs 54.374 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +powi_f32/compiler-builtins + time: [129.91 µs 130.09 µs 130.24 µs] +powi_f32/system time: [126.97 µs 127.34 µs 127.82 µs] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +powi_f64/compiler-builtins + time: [130.08 µs 130.81 µs 131.46 µs] +Found 13 outliers among 100 measurements (13.00%) + 13 (13.00%) high mild +powi_f64/system time: [128.51 µs 128.68 µs 128.88 µs] +Found 21 outliers among 100 measurements (21.00%) + 4 (4.00%) high mild + 17 (17.00%) high severe + +sub_f32/compiler-builtins + time: [37.861 µs 38.012 µs 38.158 µs] +Found 26 outliers among 100 measurements (26.00%) + 18 (18.00%) low mild + 7 (7.00%) high mild + 1 (1.00%) high severe +sub_f32/system time: [39.586 µs 39.628 µs 39.673 µs] +Found 2 outliers among 100 measurements (2.00%) + 1 (1.00%) high mild + 1 (1.00%) high severe +sub_f32/assembly (aarch64 unix) + time: [8.0976 µs 8.1584 µs 8.2208 µs] +Found 6 outliers among 100 measurements (6.00%) + 6 (6.00%) high mild + +sub_f64/compiler-builtins + time: [37.755 µs 37.838 µs 37.921 µs] +Found 25 outliers among 100 measurements (25.00%) + 7 (7.00%) low severe + 3 (3.00%) low mild + 4 (4.00%) high mild + 11 (11.00%) high severe +sub_f64/system time: [39.979 µs 40.019 µs 40.064 µs] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe +sub_f64/assembly (aarch64 unix) + time: [8.0669 µs 8.0733 µs 8.0801 µs] +Found 7 outliers among 100 measurements (7.00%) + 3 (3.00%) high mild + 4 (4.00%) high severe + +sub_f128/compiler-builtins + time: [68.618 µs 68.899 µs 69.293 µs] +Found 11 outliers among 100 measurements (11.00%) + 2 (2.00%) high mild + 9 (9.00%) high severe + +trunc_f32_f16/compiler-builtins + time: [1.3343 µs 1.3468 µs 1.3608 µs] +Found 3 outliers among 100 measurements (3.00%) + 1 (1.00%) high mild + 2 (2.00%) high severe +trunc_f32_f16/system time: [1.2687 µs 1.2714 µs 1.2738 µs] +trunc_f32_f16/assembly (aarch64 unix) + time: [470.06 ns 472.96 ns 475.30 ns] + +trunc_f64_f16/compiler-builtins + time: [1.2729 µs 1.2738 µs 1.2749 µs] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe +trunc_f64_f16/assembly (aarch64 unix) + time: [455.91 ns 456.61 ns 457.33 ns] +Found 12 outliers among 100 measurements (12.00%) + 1 (1.00%) low severe + 2 (2.00%) low mild + 6 (6.00%) high mild + 3 (3.00%) high severe + +trunc_f64_f32/compiler-builtins + time: [1.2240 µs 1.2325 µs 1.2410 µs] +Found 17 outliers among 100 measurements (17.00%) + 4 (4.00%) low mild + 2 (2.00%) high mild + 11 (11.00%) high severe +trunc_f64_f32/system time: [1.2784 µs 1.2835 µs 1.2884 µs] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) low severe + 1 (1.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe +trunc_f64_f32/assembly (aarch64 unix) + time: [455.64 ns 456.08 ns 456.58 ns] +Found 18 outliers among 100 measurements (18.00%) + 3 (3.00%) low severe + 4 (4.00%) low mild + 8 (8.00%) high mild + 3 (3.00%) high severe + +trunc_f128_f16/compiler-builtins + time: [1.2563 µs 1.2666 µs 1.2776 µs] +Found 3 outliers among 100 measurements (3.00%) + 3 (3.00%) high mild + +trunc_f128_f32/compiler-builtins + time: [1.2459 µs 1.2482 µs 1.2507 µs] +Found 6 outliers among 100 measurements (6.00%) + 2 (2.00%) low mild + 2 (2.00%) high mild + 2 (2.00%) high severe + +trunc_f128_f64/compiler-builtins + time: [1.2821 µs 1.3047 µs 1.3452 µs] +Found 8 outliers among 100 measurements (8.00%) + 4 (4.00%) low severe + 1 (1.00%) low mild + 2 (2.00%) high mild + 1 (1.00%) high severe + + +running 52 tests +test memcmp_builtin_1048576 ... bench: 20,975.52 ns/iter (+/- 239.69) = 49991 MB/s +test memcmp_builtin_16 ... bench: 1.60 ns/iter (+/- 0.05) = 16000 MB/s +test memcmp_builtin_32 ... bench: 1.61 ns/iter (+/- 0.03) = 32000 MB/s +test memcmp_builtin_4096 ... bench: 95.84 ns/iter (+/- 2.82) = 43115 MB/s +test memcmp_builtin_64 ... bench: 2.39 ns/iter (+/- 0.09) = 32000 MB/s +test memcmp_builtin_8 ... bench: 1.60 ns/iter (+/- 0.04) = 8000 MB/s +test memcmp_builtin_unaligned_1048575 ... bench: 22,060.00 ns/iter (+/- 873.55) = 47532 MB/s +test memcmp_builtin_unaligned_15 ... bench: 3.19 ns/iter (+/- 0.02) = 5333 MB/s +test memcmp_builtin_unaligned_31 ... bench: 1.61 ns/iter (+/- 0.01) = 32000 MB/s +test memcmp_builtin_unaligned_4095 ... bench: 96.63 ns/iter (+/- 4.58) = 42666 MB/s +test memcmp_builtin_unaligned_63 ... bench: 2.40 ns/iter (+/- 0.11) = 32000 MB/s +test memcmp_builtin_unaligned_7 ... bench: 3.37 ns/iter (+/- 0.05) = 2666 MB/s +test memcmp_rust_1048576 ... bench: 309,647.23 ns/iter (+/- 6,077.35) = 3386 MB/s +test memcmp_rust_16 ... bench: 5.66 ns/iter (+/- 0.30) = 3200 MB/s +test memcmp_rust_32 ... bench: 10.47 ns/iter (+/- 0.14) = 3200 MB/s +test memcmp_rust_4096 ... bench: 1,124.34 ns/iter (+/- 36.92) = 3644 MB/s +test memcmp_rust_64 ... bench: 19.90 ns/iter (+/- 0.36) = 3368 MB/s +test memcmp_rust_8 ... bench: 3.46 ns/iter (+/- 0.11) = 2666 MB/s +test memcmp_rust_unaligned_1048575 ... bench: 308,613.87 ns/iter (+/- 6,613.18) = 3397 MB/s +test memcmp_rust_unaligned_15 ... bench: 5.35 ns/iter (+/- 0.05) = 3200 MB/s +test memcmp_rust_unaligned_31 ... bench: 9.94 ns/iter (+/- 0.06) = 3555 MB/s +test memcmp_rust_unaligned_4095 ... bench: 1,120.06 ns/iter (+/- 5.03) = 3657 MB/s +test memcmp_rust_unaligned_63 ... bench: 19.64 ns/iter (+/- 0.82) = 3368 MB/s +test memcmp_rust_unaligned_7 ... bench: 3.22 ns/iter (+/- 0.10) = 2666 MB/s +test memcpy_builtin_1048576 ... bench: 12,538.05 ns/iter (+/- 354.79) = 83631 MB/s +test memcpy_builtin_1048576_misalign ... bench: 30,092.56 ns/iter (+/- 8,064.04) = 34845 MB/s +test memcpy_builtin_1048576_offset ... bench: 12,538.36 ns/iter (+/- 359.04) = 83631 MB/s +test memcpy_builtin_4096 ... bench: 44.24 ns/iter (+/- 6.80) = 93090 MB/s +test memcpy_builtin_4096_misalign ... bench: 45.34 ns/iter (+/- 2.13) = 91022 MB/s +test memcpy_builtin_4096_offset ... bench: 44.71 ns/iter (+/- 0.61) = 93090 MB/s +test memcpy_rust_1048576 ... bench: 17,943.33 ns/iter (+/- 243.18) = 58439 MB/s +test memcpy_rust_1048576_misalign ... bench: 15,004.68 ns/iter (+/- 3,978.65) = 69886 MB/s +test memcpy_rust_1048576_offset ... bench: 14,722.06 ns/iter (+/- 479.54) = 71225 MB/s +test memcpy_rust_4096 ... bench: 44.91 ns/iter (+/- 4.62) = 93090 MB/s +test memcpy_rust_4096_misalign ... bench: 76.21 ns/iter (+/- 8.21) = 53894 MB/s +test memcpy_rust_4096_offset ... bench: 76.27 ns/iter (+/- 4.69) = 53894 MB/s +test memmove_builtin_1048576 ... bench: 18,644.50 ns/iter (+/- 379.84) = 56242 MB/s +test memmove_builtin_1048576_misalign ... bench: 18,947.70 ns/iter (+/- 1,226.26) = 55342 MB/s +test memmove_builtin_4096 ... bench: 44.21 ns/iter (+/- 0.79) = 93090 MB/s +test memmove_builtin_4096_misalign ... bench: 47.21 ns/iter (+/- 3.12) = 87148 MB/s +test memmove_rust_1048576 ... bench: 34,813.33 ns/iter (+/- 3,637.47) = 30120 MB/s +test memmove_rust_1048576_misalign ... bench: 35,067.19 ns/iter (+/- 1,699.63) = 29902 MB/s +test memmove_rust_4096 ... bench: 148.69 ns/iter (+/- 1.31) = 27675 MB/s +test memmove_rust_4096_misalign ... bench: 153.81 ns/iter (+/- 1.71) = 26771 MB/s +test memset_builtin_1048576 ... bench: 15,704.12 ns/iter (+/- 12,113.86) = 66771 MB/s +test memset_builtin_1048576_offset ... bench: 17,894.23 ns/iter (+/- 175.12) = 58599 MB/s +test memset_builtin_4096 ... bench: 39.95 ns/iter (+/- 0.19) = 105025 MB/s +test memset_builtin_4096_offset ... bench: 40.48 ns/iter (+/- 3.11) = 102400 MB/s +test memset_rust_1048576 ... bench: 10,600.66 ns/iter (+/- 1,559.93) = 98922 MB/s +test memset_rust_1048576_offset ... bench: 14,810.85 ns/iter (+/- 575.27) = 70801 MB/s +test memset_rust_4096 ... bench: 37.91 ns/iter (+/- 2.77) = 110702 MB/s +test memset_rust_4096_offset ... bench: 59.99 ns/iter (+/- 10.45) = 69423 MB/s + +test result: ok. 0 passed; 0 failed; 0 ignored; 52 measured; 0 filtered out; finished in 97.74s + diff --git a/library/compiler-builtins/testcrate/benches/float_add.rs b/library/compiler-builtins/testcrate/benches/float_add.rs new file mode 100644 index 000000000000..eef1ecc57c16 --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_add.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::add; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: add_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: add::__addsf3, + sys_fn: __addsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: add_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: add::__adddf3, + sys_fn: __adddf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "addsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fadd {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: add_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: add::__addtf3, + crate_fn_ppc: add::__addkf3, + sys_fn: __addtf3, + sys_fn_ppc: __addkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_add, add_f32, add_f64, add_f128); +criterion_main!(float_add); diff --git a/library/compiler-builtins/testcrate/benches/float_cmp.rs b/library/compiler-builtins/testcrate/benches/float_cmp.rs new file mode 100644 index 000000000000..641eb0ac5d44 --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_cmp.rs @@ -0,0 +1,202 @@ +#![feature(f128)] + +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +use compiler_builtins::float::cmp; + +/// `gt` symbols are allowed to return differing results, they just get compared +/// to 0. +fn gt_res_eq(a: i32, b: i32) -> bool { + let a_lt_0 = a <= 0; + let b_lt_0 = b <= 0; + (a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0) +} + +float_bench! { + name: cmp_f32_gt, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__gtsf2, + sys_fn: __gtsf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem,nostack), + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f32_unord, + sig: (a: f32, b: f32) -> i32, + crate_fn: cmp::__unordsf2, + sys_fn: __unordsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomiss {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:s}, {b:s}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_gt, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__gtdf2, + sys_fn: __gtdf2, + sys_available: all(), + output_eq: gt_res_eq, + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "seta {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, gt", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f64_unord, + sig: (a: f64, b: f64) -> i32, + crate_fn: cmp::__unorddf2, + sys_fn: __unorddf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: i32; + asm!( + "xor {ret:e}, {ret:e}", + "ucomisd {a}, {b}", + "setp {ret:l}", + a = in(xmm_reg) a, + b = in(xmm_reg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcmp {a:d}, {b:d}", + "cset {ret:w}, vs", + a = in(vreg) a, + b = in(vreg) b, + ret = out(reg) ret, + options(nomem, nostack, pure) + ); + + ret + }; + ], +} + +float_bench! { + name: cmp_f128_gt, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__gttf2, + crate_fn_ppc: cmp::__gtkf2, + sys_fn: __gttf2, + sys_fn_ppc: __gtkf2, + sys_available: not(feature = "no-sys-f128"), + output_eq: gt_res_eq, + asm: [] +} + +float_bench! { + name: cmp_f128_unord, + sig: (a: f128, b: f128) -> i32, + crate_fn: cmp::__unordtf2, + crate_fn_ppc: cmp::__unordkf2, + sys_fn: __unordtf2, + sys_fn_ppc: __unordkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!( + float_cmp, + cmp_f32_gt, + cmp_f32_unord, + cmp_f64_gt, + cmp_f64_unord, + cmp_f128_gt, + cmp_f128_unord +); +criterion_main!(float_cmp); diff --git a/library/compiler-builtins/testcrate/benches/float_conv.rs b/library/compiler-builtins/testcrate/benches/float_conv.rs new file mode 100644 index 000000000000..bbd3a06851d4 --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_conv.rs @@ -0,0 +1,547 @@ +#![feature(f128)] +#![allow(improper_ctypes)] + +use compiler_builtins::float::conv; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +/* unsigned int -> float */ + +float_bench! { + name: conv_u32_f32, + sig: (a: u32) -> f32, + crate_fn: conv::__floatunsisf, + sys_fn: __floatunsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2ss {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u32_f64, + sig: (a: u32) -> f64, + crate_fn: conv::__floatunsidf, + sys_fn: __floatunsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "mov {tmp:e}, {a:e}", + "cvtsi2sd {ret}, {tmp}", + a = in(reg) a, + tmp = out(reg) _, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u64_f32, + sig: (a: u64) -> f32, + crate_fn: conv::__floatundisf, + sys_fn: __floatundisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "ucvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u64_f64, + sig: (a: u64) -> f64, + crate_fn: conv::__floatundidf, + sys_fn: __floatundidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "ucvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_u128_f32, + sig: (a: u128) -> f32, + crate_fn: conv::__floatuntisf, + sys_fn: __floatuntisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_u128_f64, + sig: (a: u128) -> f64, + crate_fn: conv::__floatuntidf, + sys_fn: __floatuntidf, + sys_available: all(), + asm: [] +} + +/* signed int -> float */ + +float_bench! { + name: conv_i32_f32, + sig: (a: i32) -> f32, + crate_fn: conv::__floatsisf, + sys_fn: __floatsisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i32_f64, + sig: (a: i32) -> f64, + crate_fn: conv::__floatsidf, + sys_fn: __floatsidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:e}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:w}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i64_f32, + sig: (a: i64) -> f32, + crate_fn: conv::__floatdisf, + sys_fn: __floatdisf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsi2ss {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "scvtf {ret:s}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i64_f64, + sig: (a: i64) -> f64, + crate_fn: conv::__floatdidf, + sys_fn: __floatdidf, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f64; + asm!( + "cvtsi2sd {ret}, {a:r}", + a = in(reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "scvtf {ret:d}, {a:x}", + a = in(reg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_i128_f32, + sig: (a: i128) -> f32, + crate_fn: conv::__floattisf, + sys_fn: __floattisf, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_i128_f64, + sig: (a: i128) -> f64, + crate_fn: conv::__floattidf, + sys_fn: __floattidf, + sys_available: all(), + asm: [] +} + +/* float -> unsigned int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u32, + sig: (a: f32) -> u32, + crate_fn: conv::__fixunssfsi, + sys_fn: __fixunssfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u64, + sig: (a: f32) -> u64, + crate_fn: conv::__fixunssfdi, + sys_fn: __fixunssfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_u128, + sig: (a: f32) -> u128, + crate_fn: conv::__fixunssfti, + sys_fn: __fixunssfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_u32, + sig: (a: f64) -> u32, + crate_fn: conv::__fixunsdfsi, + sys_fn: __fixunsdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u32; + asm!( + "fcvtzu {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u64, + sig: (a: f64) -> u64, + crate_fn: conv::__fixunsdfdi, + sys_fn: __fixunsdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: u64; + asm!( + "fcvtzu {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_u128, + sig: (a: f64) -> u128, + crate_fn: conv::__fixunsdfti, + sys_fn: __fixunsdfti, + sys_available: all(), + asm: [] +} + +/* float -> signed int */ + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i32, + sig: (a: f32) -> i32, + crate_fn: conv::__fixsfsi, + sys_fn: __fixsfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i64, + sig: (a: f32) -> i64, + crate_fn: conv::__fixsfdi, + sys_fn: __fixsfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:s}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +float_bench! { + name: conv_f32_i128, + sig: (a: f32) -> i128, + crate_fn: conv::__fixsfti, + sys_fn: __fixsfti, + sys_available: all(), + asm: [] +} + +float_bench! { + name: conv_f64_i32, + sig: (a: f64) -> i32, + crate_fn: conv::__fixdfsi, + sys_fn: __fixdfsi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i32; + asm!( + "fcvtzs {ret:w}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i64, + sig: (a: f64) -> i64, + crate_fn: conv::__fixdfdi, + sys_fn: __fixdfdi, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: i64; + asm!( + "fcvtzs {ret:x}, {a:d}", + a = in(vreg) a, + ret = lateout(reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: conv_f64_i128, + sig: (a: f64) -> i128, + crate_fn: conv::__fixdfti, + sys_fn: __fixdfti, + sys_available: all(), + asm: [] +} + +criterion_group!( + float_conv, + conv_u32_f32, + conv_u32_f64, + conv_u64_f32, + conv_u64_f64, + conv_u128_f32, + conv_u128_f64, + conv_i32_f32, + conv_i32_f64, + conv_i64_f32, + conv_i64_f64, + conv_i128_f32, + conv_i128_f64, + conv_f64_u32, + conv_f64_u64, + conv_f64_u128, + conv_f64_i32, + conv_f64_i64, + conv_f64_i128, +); + +// FIXME: ppc64le has a sporadic overflow panic in the crate functions +// +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +criterion_group!( + float_conv_not_ppc64le, + conv_f32_u32, + conv_f32_u64, + conv_f32_u128, + conv_f32_i32, + conv_f32_i64, + conv_f32_i128, +); + +#[cfg(all(target_arch = "powerpc64", target_endian = "little"))] +criterion_main!(float_conv); + +#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))] +criterion_main!(float_conv, float_conv_not_ppc64le); diff --git a/library/compiler-builtins/testcrate/benches/float_div.rs b/library/compiler-builtins/testcrate/benches/float_div.rs new file mode 100644 index 000000000000..e679f8ccccbe --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_div.rs @@ -0,0 +1,70 @@ +#![feature(f128)] + +use compiler_builtins::float::div; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: div_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: div::__divsf3, + sys_fn: __divsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: div_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: div::__divdf3, + sys_fn: __divdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "divsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fdiv {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +criterion_group!(float_div, div_f32, div_f64); +criterion_main!(float_div); diff --git a/library/compiler-builtins/testcrate/benches/float_extend.rs b/library/compiler-builtins/testcrate/benches/float_extend.rs new file mode 100644 index 000000000000..9bd8009e93b9 --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_extend.rs @@ -0,0 +1,93 @@ +#![allow(unused_variables)] // "unused" f16 registers +#![feature(f128)] +#![feature(f16)] + +use compiler_builtins::float::extend; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: extend_f16_f32, + sig: (a: f16) -> f32, + crate_fn: extend::__extendhfsf2, + sys_fn: __extendhfsf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `to_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: f32; + asm!( + "fcvt {ret:s}, {a:h}", + a = in(vreg) a.to_bits(), + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: extend_f16_f128, + sig: (a: f16) -> f128, + crate_fn: extend::__extendhftf2, + crate_fn_ppc: extend::__extendhfkf2, + sys_fn: __extendhftf2, + sys_fn_ppc: __extendhfkf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +float_bench! { + name: extend_f32_f64, + sig: (a: f32) -> f64, + crate_fn: extend::__extendsfdf2, + sys_fn: __extendsfdf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "aarch64")] { + let ret: f64; + asm!( + "fcvt {ret:d}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: extend_f32_f128, + sig: (a: f32) -> f128, + crate_fn: extend::__extendsftf2, + crate_fn_ppc: extend::__extendsfkf2, + sys_fn: __extendsftf2, + sys_fn_ppc: __extendsfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: extend_f64_f128, + sig: (a: f64) -> f128, + crate_fn: extend::__extenddftf2, + crate_fn_ppc: extend::__extenddfkf2, + sys_fn: __extenddftf2, + sys_fn_ppc: __extenddfkf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +criterion_group!( + float_extend, + extend_f16_f32, + extend_f16_f128, + extend_f32_f64, + extend_f32_f128, + extend_f64_f128, +); +criterion_main!(float_extend); diff --git a/library/compiler-builtins/testcrate/benches/float_mul.rs b/library/compiler-builtins/testcrate/benches/float_mul.rs new file mode 100644 index 000000000000..efa32b28563e --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_mul.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::mul; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: mul_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: mul::__mulsf3, + sys_fn: __mulsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: mul_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: mul::__muldf3, + sys_fn: __muldf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "mulsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fmul {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: mul_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: mul::__multf3, + crate_fn_ppc: mul::__mulkf3, + sys_fn: __multf3, + sys_fn_ppc: __mulkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_mul, mul_f32, mul_f64, mul_f128); +criterion_main!(float_mul); diff --git a/library/compiler-builtins/testcrate/benches/float_pow.rs b/library/compiler-builtins/testcrate/benches/float_pow.rs new file mode 100644 index 000000000000..252f740120db --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_pow.rs @@ -0,0 +1,24 @@ +use compiler_builtins::float::pow; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: powi_f32, + sig: (a: f32, b: i32) -> f32, + crate_fn: pow::__powisf2, + sys_fn: __powisf2, + sys_available: all(), + asm: [], +} + +float_bench! { + name: powi_f64, + sig: (a: f64, b: i32) -> f64, + crate_fn: pow::__powidf2, + sys_fn: __powidf2, + sys_available: all(), + asm: [], +} + +criterion_group!(float_add, powi_f32, powi_f64); +criterion_main!(float_add); diff --git a/library/compiler-builtins/testcrate/benches/float_sub.rs b/library/compiler-builtins/testcrate/benches/float_sub.rs new file mode 100644 index 000000000000..6d87604aac71 --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_sub.rs @@ -0,0 +1,81 @@ +#![feature(f128)] + +use compiler_builtins::float::sub; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: sub_f32, + sig: (a: f32, b: f32) -> f32, + crate_fn: sub::__subsf3, + sys_fn: __subsf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subss {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:s}, {a:s}, {b:s}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: sub_f64, + sig: (a: f64, b: f64) -> f64, + crate_fn: sub::__subdf3, + sys_fn: __subdf3, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + asm!( + "subsd {a}, {b}", + a = inout(xmm_reg) a, + b = in(xmm_reg) b, + options(nomem, nostack, pure) + ); + + a + }; + + #[cfg(target_arch = "aarch64")] { + asm!( + "fsub {a:d}, {a:d}, {b:d}", + a = inout(vreg) a, + b = in(vreg) b, + options(nomem, nostack, pure) + ); + + a + }; + ], +} + +float_bench! { + name: sub_f128, + sig: (a: f128, b: f128) -> f128, + crate_fn: sub::__subtf3, + crate_fn_ppc: sub::__subkf3, + sys_fn: __subtf3, + sys_fn_ppc: __subkf3, + sys_available: not(feature = "no-sys-f128"), + asm: [] +} + +criterion_group!(float_sub, sub_f32, sub_f64, sub_f128); +criterion_main!(float_sub); diff --git a/library/compiler-builtins/testcrate/benches/float_trunc.rs b/library/compiler-builtins/testcrate/benches/float_trunc.rs new file mode 100644 index 000000000000..1553dacee082 --- /dev/null +++ b/library/compiler-builtins/testcrate/benches/float_trunc.rs @@ -0,0 +1,127 @@ +#![feature(f128)] +#![feature(f16)] + +use compiler_builtins::float::trunc; +use criterion::{criterion_group, criterion_main, Criterion}; +use testcrate::float_bench; + +float_bench! { + name: trunc_f32_f16, + sig: (a: f32) -> f16, + crate_fn: trunc::__truncsfhf2, + sys_fn: __truncsfhf2, + sys_available: not(feature = "no-sys-f16"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: u16; + asm!( + "fcvt {ret:h}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + f16::from_bits(ret) + }; + ], +} + +float_bench! { + name: trunc_f64_f16, + sig: (a: f64) -> f16, + crate_fn: trunc::__truncdfhf2, + sys_fn: __truncdfhf2, + sys_available: not(feature = "no-sys-f128"), + asm: [ + #[cfg(target_arch = "aarch64")] { + // FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909) + let ret: u16; + asm!( + "fcvt {ret:h}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + f16::from_bits(ret) + }; + ], +} + +float_bench! { + name: trunc_f64_f32, + sig: (a: f64) -> f32, + crate_fn: trunc::__truncdfsf2, + sys_fn: __truncdfsf2, + sys_available: all(), + asm: [ + #[cfg(target_arch = "x86_64")] { + let ret: f32; + asm!( + "cvtsd2ss {ret}, {a}", + a = in(xmm_reg) a, + ret = lateout(xmm_reg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + + #[cfg(target_arch = "aarch64")] { + let ret: f32; + asm!( + "fcvt {ret:s}, {a:d}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack, pure), + ); + + ret + }; + ], +} + +float_bench! { + name: trunc_f128_f16, + sig: (a: f128) -> f16, + crate_fn: trunc::__trunctfhf2, + crate_fn_ppc: trunc::__trunckfhf2, + sys_fn: __trunctfhf2, + sys_fn_ppc: __trunckfhf2, + sys_available: not(feature = "no-sys-f16-f128-convert"), + asm: [], +} + +float_bench! { + name: trunc_f128_f32, + sig: (a: f128) -> f32, + crate_fn: trunc::__trunctfsf2, + crate_fn_ppc: trunc::__trunckfsf2, + sys_fn: __trunctfsf2, + sys_fn_ppc: __trunckfsf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +float_bench! { + name: trunc_f128_f64, + sig: (a: f128) -> f64, + crate_fn: trunc::__trunctfdf2, + crate_fn_ppc: trunc::__trunckfdf2, + sys_fn: __trunctfdf2, + sys_fn_ppc: __trunckfdf2, + sys_available: not(feature = "no-sys-f128"), + asm: [], +} + +criterion_group!( + float_trunc, + trunc_f32_f16, + trunc_f64_f16, + trunc_f64_f32, + trunc_f128_f16, + trunc_f128_f32, + trunc_f128_f64, +); +criterion_main!(float_trunc); diff --git a/library/compiler-builtins/testcrate/build.rs b/library/compiler-builtins/testcrate/build.rs index 1dad6c5e6491..cae83e1fc093 100644 --- a/library/compiler-builtins/testcrate/build.rs +++ b/library/compiler-builtins/testcrate/build.rs @@ -5,6 +5,8 @@ use std::{collections::HashSet, env}; enum Feature { NoSysF128, NoSysF128IntConvert, + NoSysF16, + NoSysF16F128Convert, } fn main() { @@ -31,6 +33,7 @@ fn main() { { features.insert(Feature::NoSysF128); features.insert(Feature::NoSysF128IntConvert); + features.insert(Feature::NoSysF16F128Convert); } if target.starts_with("i586") || target.starts_with("i686") { @@ -38,6 +41,17 @@ fn main() { features.insert(Feature::NoSysF128IntConvert); } + if target.contains("-unknown-linux-") { + // No `__extendhftf2` on x86, no `__trunctfhf2` on aarch64 + features.insert(Feature::NoSysF16F128Convert); + } + + if target.starts_with("wasm32-") { + // Linking says "error: function signature mismatch: __extendhfsf2" and seems to + // think the signature is either `(i32) -> f32` or `(f32) -> f32` + features.insert(Feature::NoSysF16); + } + for feature in features { let (name, warning) = match feature { Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"), @@ -45,6 +59,11 @@ fn main() { "no-sys-f128-int-convert", "using apfloat fallback for f128 to int conversions", ), + Feature::NoSysF16F128Convert => ( + "no-sys-f16-f128-convert", + "skipping using apfloat fallback for f16 <-> f128 conversions", + ), + Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"), }; println!("cargo:warning={warning}"); println!("cargo:rustc-cfg=feature=\"{name}\""); diff --git a/library/compiler-builtins/testcrate/src/bench.rs b/library/compiler-builtins/testcrate/src/bench.rs new file mode 100644 index 000000000000..1374d7b4f77a --- /dev/null +++ b/library/compiler-builtins/testcrate/src/bench.rs @@ -0,0 +1,348 @@ +use core::cell::RefCell; + +use alloc::vec::Vec; +use compiler_builtins::float::Float; + +/// Fuzz with these many items to ensure equal functions +pub const CHECK_ITER_ITEMS: u32 = 10_000; +/// Benchmark with this many items to get a variety +pub const BENCH_ITER_ITEMS: u32 = 500; + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// builtin system functions functions +pub fn skip_sys_checks(test_name: &str) -> bool { + const ALWAYS_SKIPPED: &[&str] = &[ + // FIXME(f16_f128): system symbols have incorrect results + // + "extend_f16_f32", + "trunc_f32_f16", + "trunc_f64_f16", + // FIXME(f16_f128): rounding error + // + "mul_f128", + ]; + + // FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely + // in their benchmark modules due to runtime panics. + // + const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"]; + + // FIXME(f16_f128): system symbols have incorrect results + // + const X86_NO_SSE_SKIPPED: &[&str] = &["add_f128", "sub_f128", "powi_f32", "powi_f64"]; + + // FIXME(llvm): system symbols have incorrect results on Windows + // + const WINDOWS_SKIPPED: &[&str] = &[ + "conv_f32_u128", + "conv_f32_i128", + "conv_f64_u128", + "conv_f64_i128", + ]; + + if cfg!(target_arch = "arm") { + // The Arm symbols need a different ABI that our macro doesn't handle, just skip it + return true; + } + + if ALWAYS_SKIPPED.contains(&test_name) { + return true; + } + + if cfg!(all(target_arch = "powerpc64", target_endian = "little")) + && PPC64LE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(all(target_arch = "x86", not(target_feature = "sse"))) + && X86_NO_SSE_SKIPPED.contains(&test_name) + { + return true; + } + + if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) { + return true; + } + + false +} + +/// Still run benchmarks/tests but don't check correctness between compiler-builtins and +/// assembly functions +pub fn skip_asm_checks(test_name: &str) -> bool { + // FIXME(f16_f128): rounding error + // + const SKIPPED: &[&str] = &["mul_f32", "mul_f64"]; + + SKIPPED.contains(&test_name) +} + +/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten +/// assembly. +#[macro_export] +macro_rules! float_bench { + ( + // Name of this benchmark + name: $name:ident, + // The function signature to be tested + sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty, + // Path to the crate in compiler_builtins + crate_fn: $crate_fn:path, + // Optional alias on ppc + $( crate_fn_ppc: $crate_fn_ppc:path, )? + // Name of the system symbol + sys_fn: $sys_fn:ident, + // Optional alias on ppc + $( sys_fn_ppc: $sys_fn_ppc:path, )? + // Meta saying whether the system symbol is available + sys_available: $sys_available:meta, + // An optional function to validate the results of two functions are equal, if not + // just `$ret_ty::check_eq` + $( output_eq: $output_eq:expr, )? + // Assembly implementations, if any. + asm: [ + $( + #[cfg($asm_meta:meta)] { + $($asm_tt:tt)* + } + );* + $(;)? + ] + $(,)? + ) => {paste::paste! { + #[cfg($sys_available)] + extern "C" { + /// Binding for the system function + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + + + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + float_bench! { @coalesce_fn $($sys_fn_ppc)? => + fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty; + } + } + + fn $name(c: &mut Criterion) { + use core::hint::black_box; + use compiler_builtins::float::Float; + use $crate::bench::TestIO; + + #[inline(never)] // equalize with external calls + fn crate_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_crate_fn = $crate_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn); + + target_crate_fn( $($arg),* ) + } + + #[inline(always)] // already a branch + #[cfg($sys_available)] + fn sys_fn($($arg: $arg_ty),*) -> $ret_ty { + #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))] + let target_sys_fn = $sys_fn; + + // On PPC, use an alias if specified + #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] + let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn); + + unsafe { target_sys_fn( $($arg),* ) } + } + + #[inline(never)] // equalize with external calls + #[cfg(any( $($asm_meta),* ))] + fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty { + use core::arch::asm; + $( + #[cfg($asm_meta)] + unsafe { $($asm_tt)* } + )* + } + + let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS); + let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS); + let test_name = stringify!($name); + let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq); + + // Verify math lines up. We run the crate functions even if we don't validate the + // output here to make sure there are no panics or crashes. + + #[cfg($sys_available)] + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let sys_res = sys_fn($($arg),*); + + if $crate::bench::skip_sys_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, sys_res), + "{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}", + ($($arg),* ,) + ); + } + + #[cfg(any( $($asm_meta),* ))] + { + for ($($arg),*) in testvec.iter().copied() { + let crate_res = crate_fn($($arg),*); + let asm_res = asm_fn($($arg),*); + + if $crate::bench::skip_asm_checks(test_name) { + continue; + } + + assert!( + check_eq(crate_res, asm_res), + "{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}", + ($($arg),* ,) + ); + } + } + + let mut group = c.benchmark_group(test_name); + group.bench_function("compiler-builtins", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(crate_fn( $(black_box($arg)),* )); + } + })); + + #[cfg($sys_available)] + group.bench_function("system", |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(sys_fn( $(black_box($arg)),* )); + } + })); + + #[cfg(any( $($asm_meta),* ))] + group.bench_function(&format!( + "assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY + ), |b| b.iter(|| { + for ($($arg),*) in benchvec.iter().copied() { + black_box(asm_fn( $(black_box($arg)),* )); + } + })); + + group.finish(); + } + }}; + + // Allow overriding a default + (@coalesce $specified:expr, $default:expr) => { $specified }; + (@coalesce, $default:expr) => { $default }; + + // Allow overriding a function name + (@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => { + fn $specified $($tt)+ + }; + (@coalesce_fn => fn $default_name:ident $($tt:tt)+) => { + fn $default_name $($tt)+ + }; +} + +/// A type used as either an input or output to/from a benchmark function. +pub trait TestIO: Sized { + fn make_testvec(len: u32) -> Vec; + fn check_eq(a: Self, b: Self) -> bool; +} + +macro_rules! impl_testio { + (float $($f_ty:ty),+) => {$( + impl TestIO for $f_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + Float::eq_repr(a, b) + } + } + + impl TestIO for ($f_ty, $f_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + (int $($i_ty:ty),+) => {$( + impl TestIO for $i_ty { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz(len, |a| ret.borrow_mut().push(a)); + ret.into_inner() + } + + fn check_eq(a: Self, b: Self) -> bool { + a == b + } + } + + impl TestIO for ($i_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ret = RefCell::new(Vec::new()); + crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b))); + ret.into_inner() + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + )*}; + + ((float, int) ($f_ty:ty, $i_ty:ty)) => { + impl TestIO for ($f_ty, $i_ty) { + fn make_testvec(len: u32) -> Vec { + // refcell because fuzz_* takes a `Fn` + let ivec = RefCell::new(Vec::new()); + let fvec = RefCell::new(Vec::new()); + + crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a)); + crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a)); + + let mut ret = Vec::new(); + let ivec = ivec.into_inner(); + let fvec = fvec.into_inner(); + + for f in fvec { + for i in &ivec { + ret.push((f, *i)); + } + } + + ret + } + + fn check_eq(_a: Self, _b: Self) -> bool { + unimplemented!() + } + } + } +} + +#[cfg(not(feature = "no-f16-f128"))] +impl_testio!(float f16, f128); +impl_testio!(float f32, f64); +impl_testio!(int i16, i32, i64, i128); +impl_testio!(int u16, u32, u64, u128); +impl_testio!((float, int)(f32, i32)); +impl_testio!((float, int)(f64, i32)); diff --git a/library/compiler-builtins/testcrate/src/lib.rs b/library/compiler-builtins/testcrate/src/lib.rs index 5ee96ad27517..f9b052528f02 100644 --- a/library/compiler-builtins/testcrate/src/lib.rs +++ b/library/compiler-builtins/testcrate/src/lib.rs @@ -13,6 +13,12 @@ //! Some floating point tests are disabled for specific architectures, because they do not have //! correct rounding. #![no_std] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))] +#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))] +#![feature(isqrt)] + +pub mod bench; +extern crate alloc; use compiler_builtins::float::Float; use compiler_builtins::int::{Int, MinInt};