Merge pull request #618 from tgross35/benchmarking

Add benchmarks for floating point math
This commit is contained in:
Amanieu d'Antras 2024-05-24 23:18:00 +02:00 committed by GitHub
commit 46e377ae5f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 2930 additions and 2 deletions

View file

@ -4,7 +4,9 @@ set -eux
target="${1:-}"
if [ -z "${1:-}" ]; then
export RUST_BACKTRACE="${RUST_BACKTRACE:-full}"
if [ -z "$target" ]; then
host_target=$(rustc -vV | awk '/^host/ { print $2 }')
echo "Defaulted to host target $host_target"
target="$host_target"
@ -30,6 +32,8 @@ else
$run --features no-asm --release
$run --features no-f16-f128
$run --features no-f16-f128 --release
$run --benches
$run --benches --release
fi
if [ "${TEST_VERBATIM:-}" = "1" ]; then

View file

@ -21,6 +21,10 @@ path = ".."
default-features = false
features = ["public-test-deps"]
[dev-dependencies]
criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
paste = "1.0.15"
[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
test = { git = "https://github.com/japaric/utest" }
utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" }
@ -34,6 +38,48 @@ no-f16-f128 = ["compiler_builtins/no-f16-f128"]
mem = ["compiler_builtins/mem"]
mangled-names = ["compiler_builtins/mangled-names"]
# Skip tests that rely on f128 symbols being available on the system
no-sys-f128 = ["no-sys-f128-int-convert"]
no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
# Some platforms have some f128 functions but everything except integer conversions
no-sys-f128-int-convert = []
no-sys-f16-f128-convert = []
# Skip tests that rely on f16 symbols being available on the system
no-sys-f16 = []
# Enable report generation without bringing in more dependencies by default
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
[[bench]]
name = "float_add"
harness = false
[[bench]]
name = "float_sub"
harness = false
[[bench]]
name = "float_mul"
harness = false
[[bench]]
name = "float_div"
harness = false
[[bench]]
name = "float_cmp"
harness = false
[[bench]]
name = "float_conv"
harness = false
[[bench]]
name = "float_extend"
harness = false
[[bench]]
name = "float_trunc"
harness = false
[[bench]]
name = "float_pow"
harness = false

View file

@ -0,0 +1,500 @@
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
add_f32 compiler-builtins
time: [35.804 µs 35.863 µs 35.920 µs]
Found 5 outliers among 100 measurements (5.00%)
2 (2.00%) high mild
3 (3.00%) high severe
add_f32 system time: [39.084 µs 39.127 µs 39.169 µs]
Found 11 outliers among 100 measurements (11.00%)
7 (7.00%) high mild
4 (4.00%) high severe
add_f32 assembly (aarch64 unix)
time: [8.1034 µs 8.1441 µs 8.1866 µs]
Found 4 outliers among 100 measurements (4.00%)
4 (4.00%) high mild
add_f64 compiler-builtins
time: [35.647 µs 35.725 µs 35.799 µs]
Found 10 outliers among 100 measurements (10.00%)
8 (8.00%) high mild
2 (2.00%) high severe
add_f64 system time: [39.308 µs 39.322 µs 39.336 µs]
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe
add_f64 assembly (aarch64 unix)
time: [8.0401 µs 8.0442 µs 8.0499 µs]
Found 11 outliers among 100 measurements (11.00%)
2 (2.00%) high mild
9 (9.00%) high severe
add_f128 compiler-builtins
time: [41.801 µs 41.986 µs 42.201 µs]
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe
cmp_f32_gt compiler-builtins
time: [13.579 µs 13.675 µs 13.778 µs]
Found 16 outliers among 100 measurements (16.00%)
6 (6.00%) high mild
10 (10.00%) high severe
cmp_f32_gt system time: [12.343 µs 12.348 µs 12.355 µs]
Found 13 outliers among 100 measurements (13.00%)
1 (1.00%) low mild
3 (3.00%) high mild
9 (9.00%) high severe
cmp_f32_gt assembly (aarch64 unix)
time: [8.2593 µs 8.3185 µs 8.3813 µs]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild
cmp_f32_unord compiler-builtins
time: [11.977 µs 12.042 µs 12.109 µs]
Found 13 outliers among 100 measurements (13.00%)
5 (5.00%) low severe
6 (6.00%) low mild
2 (2.00%) high mild
cmp_f32_unord system time: [8.1236 µs 8.1736 µs 8.2350 µs]
Found 18 outliers among 100 measurements (18.00%)
5 (5.00%) high mild
13 (13.00%) high severe
cmp_f32_unord assembly (aarch64 unix)
time: [8.1446 µs 8.2080 µs 8.2762 µs]
Found 14 outliers among 100 measurements (14.00%)
6 (6.00%) high mild
8 (8.00%) high severe
cmp_f64_gt compiler-builtins
time: [16.073 µs 16.077 µs 16.082 µs]
Found 17 outliers among 100 measurements (17.00%)
2 (2.00%) low mild
4 (4.00%) high mild
11 (11.00%) high severe
cmp_f64_gt system time: [12.456 µs 12.487 µs 12.522 µs]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
cmp_f64_gt assembly (aarch64 unix)
time: [8.0557 µs 8.0616 µs 8.0685 µs]
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) high mild
2 (2.00%) high severe
cmp_f64_unord compiler-builtins
time: [10.715 µs 10.724 µs 10.737 µs]
Found 13 outliers among 100 measurements (13.00%)
3 (3.00%) high mild
10 (10.00%) high severe
cmp_f64_unord system time: [8.0692 µs 8.0734 µs 8.0784 µs]
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) high mild
2 (2.00%) high severe
cmp_f64_unord assembly (aarch64 unix)
time: [8.0569 µs 8.0677 µs 8.0818 µs]
Found 18 outliers among 100 measurements (18.00%)
4 (4.00%) high mild
14 (14.00%) high severe
cmp_f128_gt compiler-builtins
time: [18.234 µs 18.401 µs 18.602 µs]
cmp_f128_unord compiler-builtins
time: [13.410 µs 13.471 µs 13.542 µs]
Found 7 outliers among 100 measurements (7.00%)
7 (7.00%) high mild
conv_u32_f32 compiler-builtins
time: [774.58 ns 776.01 ns 777.59 ns]
Found 9 outliers among 100 measurements (9.00%)
2 (2.00%) high mild
7 (7.00%) high severe
conv_u32_f32 system time: [622.68 ns 625.64 ns 629.26 ns]
Found 16 outliers among 100 measurements (16.00%)
7 (7.00%) high mild
9 (9.00%) high severe
conv_u32_f32 assembly (aarch64 unix)
time: [468.05 ns 469.76 ns 471.46 ns]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
conv_u32_f64 compiler-builtins
time: [617.61 ns 618.00 ns 618.52 ns]
Found 13 outliers among 100 measurements (13.00%)
4 (4.00%) high mild
9 (9.00%) high severe
conv_u32_f64 system time: [469.56 ns 471.03 ns 472.81 ns]
Found 11 outliers among 100 measurements (11.00%)
7 (7.00%) high mild
4 (4.00%) high severe
conv_u32_f64 assembly (aarch64 unix)
time: [464.43 ns 465.01 ns 465.72 ns]
Found 13 outliers among 100 measurements (13.00%)
5 (5.00%) high mild
8 (8.00%) high severe
conv_u64_f32 compiler-builtins
time: [847.95 ns 848.19 ns 848.46 ns]
Found 19 outliers among 100 measurements (19.00%)
3 (3.00%) low mild
9 (9.00%) high mild
7 (7.00%) high severe
conv_u64_f32 system time: [701.68 ns 701.95 ns 702.30 ns]
Found 10 outliers among 100 measurements (10.00%)
4 (4.00%) high mild
6 (6.00%) high severe
conv_u64_f32 assembly (aarch64 unix)
time: [511.73 ns 512.43 ns 513.32 ns]
Found 6 outliers among 100 measurements (6.00%)
6 (6.00%) high mild
conv_u64_f64 compiler-builtins
time: [681.23 ns 682.55 ns 684.30 ns]
Found 18 outliers among 100 measurements (18.00%)
1 (1.00%) high mild
17 (17.00%) high severe
conv_u64_f64 system time: [679.34 ns 679.57 ns 679.88 ns]
Found 18 outliers among 100 measurements (18.00%)
1 (1.00%) low mild
6 (6.00%) high mild
11 (11.00%) high severe
conv_u64_f64 assembly (aarch64 unix)
time: [509.90 ns 510.09 ns 510.30 ns]
Found 15 outliers among 100 measurements (15.00%)
6 (6.00%) high mild
9 (9.00%) high severe
conv_u128_f32 compiler-builtins
time: [1.1368 µs 1.1372 µs 1.1377 µs]
Found 14 outliers among 100 measurements (14.00%)
8 (8.00%) high mild
6 (6.00%) high severe
conv_u128_f32 system time: [1.4338 µs 1.4370 µs 1.4410 µs]
Found 7 outliers among 100 measurements (7.00%)
2 (2.00%) high mild
5 (5.00%) high severe
conv_u128_f64 compiler-builtins
time: [1.0133 µs 1.0143 µs 1.0156 µs]
Found 16 outliers among 100 measurements (16.00%)
2 (2.00%) high mild
14 (14.00%) high severe
conv_u128_f64 system time: [1.3473 µs 1.3530 µs 1.3600 µs]
Found 4 outliers among 100 measurements (4.00%)
4 (4.00%) high mild
conv_i32_f32 compiler-builtins
time: [906.53 ns 907.86 ns 909.23 ns]
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe
conv_i32_f32 system time: [914.53 ns 915.69 ns 917.01 ns]
Found 10 outliers among 100 measurements (10.00%)
6 (6.00%) high mild
4 (4.00%) high severe
conv_i32_f32 assembly (aarch64 unix)
time: [464.55 ns 465.10 ns 465.83 ns]
Found 4 outliers among 100 measurements (4.00%)
4 (4.00%) high mild
conv_i32_f64 compiler-builtins
time: [617.63 ns 617.92 ns 618.27 ns]
Found 12 outliers among 100 measurements (12.00%)
3 (3.00%) high mild
9 (9.00%) high severe
conv_i32_f64 system time: [622.83 ns 624.19 ns 625.61 ns]
Found 6 outliers among 100 measurements (6.00%)
5 (5.00%) high mild
1 (1.00%) high severe
conv_i32_f64 assembly (aarch64 unix)
time: [465.24 ns 466.04 ns 466.95 ns]
Found 11 outliers among 100 measurements (11.00%)
4 (4.00%) high mild
7 (7.00%) high severe
conv_i64_f32 compiler-builtins
time: [852.67 ns 853.92 ns 855.34 ns]
Found 11 outliers among 100 measurements (11.00%)
3 (3.00%) high mild
8 (8.00%) high severe
conv_i64_f32 system time: [906.94 ns 908.04 ns 909.33 ns]
Found 15 outliers among 100 measurements (15.00%)
2 (2.00%) high mild
13 (13.00%) high severe
conv_i64_f32 assembly (aarch64 unix)
time: [510.84 ns 511.27 ns 511.80 ns]
Found 8 outliers among 100 measurements (8.00%)
3 (3.00%) high mild
5 (5.00%) high severe
conv_i64_f64 compiler-builtins
time: [932.35 ns 932.97 ns 933.76 ns]
Found 10 outliers among 100 measurements (10.00%)
4 (4.00%) high mild
6 (6.00%) high severe
conv_i64_f64 system time: [955.91 ns 958.95 ns 962.05 ns]
Found 5 outliers among 100 measurements (5.00%)
3 (3.00%) high mild
2 (2.00%) high severe
conv_i64_f64 assembly (aarch64 unix)
time: [510.19 ns 510.72 ns 511.44 ns]
Found 9 outliers among 100 measurements (9.00%)
5 (5.00%) high mild
4 (4.00%) high severe
conv_i128_f32 compiler-builtins
time: [1.4248 µs 1.4285 µs 1.4323 µs]
Found 12 outliers among 100 measurements (12.00%)
7 (7.00%) high mild
5 (5.00%) high severe
conv_i128_f32 system time: [1.6970 µs 1.7017 µs 1.7069 µs]
Found 5 outliers among 100 measurements (5.00%)
3 (3.00%) high mild
2 (2.00%) high severe
conv_i128_f64 compiler-builtins
time: [1.3132 µs 1.3161 µs 1.3191 µs]
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) high mild
1 (1.00%) high severe
conv_i128_f64 system time: [1.6071 µs 1.6100 µs 1.6133 µs]
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
conv_f64_u32 compiler-builtins
time: [640.35 ns 641.00 ns 641.68 ns]
Found 6 outliers among 100 measurements (6.00%)
4 (4.00%) high mild
2 (2.00%) high severe
conv_f64_u32 system time: [640.87 ns 641.63 ns 642.42 ns]
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) high mild
2 (2.00%) high severe
conv_f64_u32 assembly (aarch64 unix)
time: [482.02 ns 482.67 ns 483.38 ns]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high severe
conv_f64_u64 compiler-builtins
time: [638.58 ns 638.98 ns 639.45 ns]
Found 15 outliers among 100 measurements (15.00%)
1 (1.00%) high mild
14 (14.00%) high severe
conv_f64_u64 system time: [642.54 ns 644.07 ns 645.59 ns]
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
conv_f64_u64 assembly (aarch64 unix)
time: [482.65 ns 483.70 ns 484.87 ns]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild
conv_f64_u128 compiler-builtins
time: [1.0631 µs 1.0652 µs 1.0674 µs]
Found 8 outliers among 100 measurements (8.00%)
7 (7.00%) high mild
1 (1.00%) high severe
conv_f64_u128 system time: [821.41 ns 823.45 ns 825.74 ns]
Found 11 outliers among 100 measurements (11.00%)
8 (8.00%) high mild
3 (3.00%) high severe
conv_f64_i32 compiler-builtins
time: [826.76 ns 845.08 ns 870.23 ns]
Found 4 outliers among 100 measurements (4.00%)
4 (4.00%) high mild
conv_f64_i32 system time: [764.12 ns 764.63 ns 765.26 ns]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high severe
conv_f64_i32 assembly (aarch64 unix)
time: [484.50 ns 485.98 ns 487.54 ns]
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) high mild
2 (2.00%) high severe
conv_f64_i64 compiler-builtins
time: [797.27 ns 798.19 ns 799.84 ns]
Found 9 outliers among 100 measurements (9.00%)
5 (5.00%) high mild
4 (4.00%) high severe
conv_f64_i64 system time: [768.74 ns 769.52 ns 770.23 ns]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high severe
conv_f64_i64 assembly (aarch64 unix)
time: [480.59 ns 481.03 ns 481.46 ns]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
conv_f64_i128 compiler-builtins
time: [1.0577 µs 1.0591 µs 1.0606 µs]
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) high mild
1 (1.00%) high severe
conv_f64_i128 system time: [1.0181 µs 1.0195 µs 1.0211 µs]
Found 3 outliers among 100 measurements (3.00%)
3 (3.00%) high mild
conv_f32_u32 compiler-builtins
time: [800.40 ns 801.39 ns 802.35 ns]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
conv_f32_u32 system time: [638.12 ns 638.34 ns 638.63 ns]
Found 11 outliers among 100 measurements (11.00%)
4 (4.00%) high mild
7 (7.00%) high severe
conv_f32_u32 assembly (aarch64 unix)
time: [479.37 ns 480.97 ns 483.32 ns]
Found 13 outliers among 100 measurements (13.00%)
6 (6.00%) high mild
7 (7.00%) high severe
conv_f32_u64 compiler-builtins
time: [801.95 ns 803.64 ns 805.75 ns]
conv_f32_u64 system time: [638.20 ns 638.56 ns 639.07 ns]
Found 10 outliers among 100 measurements (10.00%)
1 (1.00%) high mild
9 (9.00%) high severe
conv_f32_u64 assembly (aarch64 unix)
time: [480.07 ns 480.47 ns 480.86 ns]
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) high mild
1 (1.00%) high severe
conv_f32_u128 compiler-builtins
time: [1.1579 µs 1.1623 µs 1.1657 µs]
Found 14 outliers among 100 measurements (14.00%)
2 (2.00%) low severe
7 (7.00%) high mild
5 (5.00%) high severe
conv_f32_u128 system time: [1.0344 µs 1.0394 µs 1.0450 µs]
conv_f32_i32 compiler-builtins
time: [800.14 ns 801.52 ns 803.26 ns]
Found 10 outliers among 100 measurements (10.00%)
8 (8.00%) high mild
2 (2.00%) high severe
conv_f32_i32 system time: [741.36 ns 741.74 ns 742.13 ns]
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) high mild
2 (2.00%) high severe
conv_f32_i32 assembly (aarch64 unix)
time: [484.35 ns 486.08 ns 488.11 ns]
Found 17 outliers among 100 measurements (17.00%)
9 (9.00%) high mild
8 (8.00%) high severe
conv_f32_i64 compiler-builtins
time: [800.94 ns 802.68 ns 804.74 ns]
conv_f32_i64 system time: [748.60 ns 750.68 ns 753.16 ns]
Found 9 outliers among 100 measurements (9.00%)
4 (4.00%) high mild
5 (5.00%) high severe
conv_f32_i64 assembly (aarch64 unix)
time: [480.70 ns 481.23 ns 481.82 ns]
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) high mild
2 (2.00%) high severe
conv_f32_i128 compiler-builtins
time: [1.1774 µs 1.1829 µs 1.1887 µs]
Found 11 outliers among 100 measurements (11.00%)
1 (1.00%) low severe
7 (7.00%) low mild
1 (1.00%) high mild
2 (2.00%) high severe
conv_f32_i128 system time: [1.1785 µs 1.1853 µs 1.1941 µs]
Found 7 outliers among 100 measurements (7.00%)
2 (2.00%) high mild
5 (5.00%) high severe
div_f32 compiler-builtins
time: [38.852 µs 39.011 µs 39.178 µs]
Found 3 outliers among 100 measurements (3.00%)
3 (3.00%) high mild
div_f32 system time: [41.846 µs 41.920 µs 42.005 µs]
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) high mild
2 (2.00%) high severe
div_f32 assembly (aarch64 unix)
time: [8.1309 µs 8.1627 µs 8.2005 µs]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
div_f64 compiler-builtins
time: [50.369 µs 50.605 µs 50.857 µs]
Found 15 outliers among 100 measurements (15.00%)
11 (11.00%) high mild
4 (4.00%) high severe
div_f64 system time: [53.506 µs 53.582 µs 53.676 µs]
Found 8 outliers among 100 measurements (8.00%)
4 (4.00%) high mild
4 (4.00%) high severe
div_f64 assembly (aarch64 unix)
time: [8.0695 µs 8.0807 µs 8.0948 µs]
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) high mild
2 (2.00%) high severe

View file

@ -0,0 +1,699 @@
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
add_f32/compiler-builtins
time: [36.813 µs 37.048 µs 37.303 µs]
Found 5 outliers among 100 measurements (5.00%)
5 (5.00%) high mild
add_f32/system time: [39.103 µs 39.142 µs 39.189 µs]
Found 8 outliers among 100 measurements (8.00%)
2 (2.00%) high mild
6 (6.00%) high severe
add_f32/assembly (aarch64 unix)
time: [8.3786 µs 8.4680 µs 8.5570 µs]
add_f64/compiler-builtins
time: [35.784 µs 35.819 µs 35.863 µs]
Found 4 outliers among 100 measurements (4.00%)
1 (1.00%) high mild
3 (3.00%) high severe
add_f64/system time: [39.634 µs 39.689 µs 39.746 µs]
Found 16 outliers among 100 measurements (16.00%)
4 (4.00%) high mild
12 (12.00%) high severe
add_f64/assembly (aarch64 unix)
time: [8.0533 µs 8.0599 µs 8.0670 µs]
Found 14 outliers among 100 measurements (14.00%)
6 (6.00%) high mild
8 (8.00%) high severe
add_f128/compiler-builtins
time: [41.830 µs 41.920 µs 42.005 µs]
cmp_f32_gt/compiler-builtins
time: [13.405 µs 13.411 µs 13.418 µs]
Found 18 outliers among 100 measurements (18.00%)
4 (4.00%) high mild
14 (14.00%) high severe
cmp_f32_gt/system time: [12.348 µs 12.355 µs 12.363 µs]
Found 12 outliers among 100 measurements (12.00%)
2 (2.00%) high mild
10 (10.00%) high severe
cmp_f32_gt/assembly (aarch64 unix)
time: [8.1233 µs 8.1625 µs 8.2072 µs]
Found 12 outliers among 100 measurements (12.00%)
7 (7.00%) high mild
5 (5.00%) high severe
cmp_f32_unord/compiler-builtins
time: [11.349 µs 11.467 µs 11.584 µs]
cmp_f32_unord/system time: [8.0714 µs 8.0792 µs 8.0890 µs]
Found 16 outliers among 100 measurements (16.00%)
4 (4.00%) high mild
12 (12.00%) high severe
cmp_f32_unord/assembly (aarch64 unix)
time: [8.1121 µs 8.1705 µs 8.2325 µs]
Found 20 outliers among 100 measurements (20.00%)
3 (3.00%) high mild
17 (17.00%) high severe
cmp_f64_gt/compiler-builtins
time: [13.749 µs 13.837 µs 13.934 µs]
Found 20 outliers among 100 measurements (20.00%)
9 (9.00%) low mild
7 (7.00%) high mild
4 (4.00%) high severe
cmp_f64_gt/system time: [12.475 µs 12.515 µs 12.565 µs]
Found 4 outliers among 100 measurements (4.00%)
4 (4.00%) high mild
cmp_f64_gt/assembly (aarch64 unix)
time: [8.0456 µs 8.0540 µs 8.0653 µs]
Found 12 outliers among 100 measurements (12.00%)
3 (3.00%) high mild
9 (9.00%) high severe
cmp_f64_unord/compiler-builtins
time: [10.723 µs 10.730 µs 10.739 µs]
Found 15 outliers among 100 measurements (15.00%)
5 (5.00%) high mild
10 (10.00%) high severe
cmp_f64_unord/system time: [8.0944 µs 8.1296 µs 8.1683 µs]
Found 17 outliers among 100 measurements (17.00%)
4 (4.00%) high mild
13 (13.00%) high severe
cmp_f64_unord/assembly (aarch64 unix)
time: [8.1042 µs 8.1337 µs 8.1662 µs]
Found 3 outliers among 100 measurements (3.00%)
3 (3.00%) high mild
cmp_f128_gt/compiler-builtins
time: [20.508 µs 20.558 µs 20.615 µs]
Found 8 outliers among 100 measurements (8.00%)
2 (2.00%) high mild
6 (6.00%) high severe
cmp_f128_unord/compiler-builtins
time: [13.332 µs 13.346 µs 13.360 µs]
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) high mild
2 (2.00%) high severe
conv_u32_f32/compiler-builtins
time: [621.20 ns 621.89 ns 622.65 ns]
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe
conv_u32_f32/system time: [621.44 ns 622.08 ns 622.74 ns]
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
conv_u32_f32/assembly (aarch64 unix)
time: [465.96 ns 466.65 ns 467.45 ns]
Found 13 outliers among 100 measurements (13.00%)
3 (3.00%) high mild
10 (10.00%) high severe
conv_u32_f64/compiler-builtins
time: [619.71 ns 620.51 ns 621.52 ns]
Found 5 outliers among 100 measurements (5.00%)
4 (4.00%) high mild
1 (1.00%) high severe
conv_u32_f64/system time: [466.60 ns 467.14 ns 467.77 ns]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
conv_u32_f64/assembly (aarch64 unix)
time: [464.02 ns 464.32 ns 464.69 ns]
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) high mild
1 (1.00%) high severe
conv_u64_f32/compiler-builtins
time: [851.24 ns 852.98 ns 854.77 ns]
Found 5 outliers among 100 measurements (5.00%)
5 (5.00%) high mild
conv_u64_f32/system time: [724.35 ns 729.43 ns 735.07 ns]
Found 4 outliers among 100 measurements (4.00%)
4 (4.00%) high mild
conv_u64_f32/assembly (aarch64 unix)
time: [513.30 ns 514.64 ns 516.16 ns]
Found 8 outliers among 100 measurements (8.00%)
8 (8.00%) high mild
conv_u64_f64/compiler-builtins
time: [850.72 ns 853.26 ns 856.54 ns]
Found 15 outliers among 100 measurements (15.00%)
2 (2.00%) high mild
13 (13.00%) high severe
conv_u64_f64/system time: [681.43 ns 682.54 ns 683.79 ns]
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
conv_u64_f64/assembly (aarch64 unix)
time: [511.37 ns 511.71 ns 512.02 ns]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high severe
conv_u128_f32/compiler-builtins
time: [1.1395 µs 1.1409 µs 1.1424 µs]
Found 10 outliers among 100 measurements (10.00%)
6 (6.00%) high mild
4 (4.00%) high severe
conv_u128_f32/system time: [1.4348 µs 1.4369 µs 1.4390 µs]
Found 5 outliers among 100 measurements (5.00%)
4 (4.00%) high mild
1 (1.00%) high severe
conv_u128_f64/compiler-builtins
time: [1.0148 µs 1.0157 µs 1.0167 µs]
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
conv_u128_f64/system time: [1.3404 µs 1.3423 µs 1.3442 µs]
Found 8 outliers among 100 measurements (8.00%)
7 (7.00%) high mild
1 (1.00%) high severe
conv_i32_f32/compiler-builtins
time: [902.89 ns 903.81 ns 904.84 ns]
Found 7 outliers among 100 measurements (7.00%)
4 (4.00%) high mild
3 (3.00%) high severe
conv_i32_f32/system time: [942.62 ns 949.04 ns 955.77 ns]
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
conv_i32_f32/assembly (aarch64 unix)
time: [466.06 ns 466.60 ns 467.27 ns]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high severe
conv_i32_f64/compiler-builtins
time: [618.98 ns 619.24 ns 619.55 ns]
Found 17 outliers among 100 measurements (17.00%)
1 (1.00%) low mild
3 (3.00%) high mild
13 (13.00%) high severe
conv_i32_f64/system time: [622.18 ns 623.41 ns 624.85 ns]
Found 8 outliers among 100 measurements (8.00%)
5 (5.00%) high mild
3 (3.00%) high severe
conv_i32_f64/assembly (aarch64 unix)
time: [466.26 ns 466.76 ns 467.35 ns]
Found 9 outliers among 100 measurements (9.00%)
5 (5.00%) high mild
4 (4.00%) high severe
conv_i64_f32/compiler-builtins
time: [850.11 ns 850.45 ns 850.88 ns]
Found 15 outliers among 100 measurements (15.00%)
1 (1.00%) low severe
1 (1.00%) low mild
3 (3.00%) high mild
10 (10.00%) high severe
conv_i64_f32/system time: [908.36 ns 908.70 ns 909.10 ns]
Found 12 outliers among 100 measurements (12.00%)
3 (3.00%) high mild
9 (9.00%) high severe
conv_i64_f32/assembly (aarch64 unix)
time: [513.56 ns 514.44 ns 515.38 ns]
Found 8 outliers among 100 measurements (8.00%)
8 (8.00%) high mild
conv_i64_f64/compiler-builtins
time: [935.39 ns 935.78 ns 936.26 ns]
Found 13 outliers among 100 measurements (13.00%)
5 (5.00%) high mild
8 (8.00%) high severe
conv_i64_f64/system time: [946.56 ns 947.33 ns 948.20 ns]
Found 8 outliers among 100 measurements (8.00%)
6 (6.00%) high mild
2 (2.00%) high severe
conv_i64_f64/assembly (aarch64 unix)
time: [511.55 ns 512.03 ns 512.56 ns]
Found 21 outliers among 100 measurements (21.00%)
4 (4.00%) high mild
17 (17.00%) high severe
conv_i128_f32/compiler-builtins
time: [1.4206 µs 1.4218 µs 1.4232 µs]
Found 10 outliers among 100 measurements (10.00%)
5 (5.00%) high mild
5 (5.00%) high severe
conv_i128_f32/system time: [1.6863 µs 1.6891 µs 1.6922 µs]
Found 10 outliers among 100 measurements (10.00%)
9 (9.00%) high mild
1 (1.00%) high severe
conv_i128_f64/compiler-builtins
time: [1.3110 µs 1.3122 µs 1.3136 µs]
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) high mild
2 (2.00%) high severe
conv_i128_f64/system time: [1.6022 µs 1.6048 µs 1.6090 µs]
Found 5 outliers among 100 measurements (5.00%)
3 (3.00%) high mild
2 (2.00%) high severe
conv_f64_u32/compiler-builtins
time: [798.65 ns 799.42 ns 800.39 ns]
Found 15 outliers among 100 measurements (15.00%)
6 (6.00%) high mild
9 (9.00%) high severe
conv_f64_u32/system time: [639.48 ns 639.88 ns 640.40 ns]
Found 16 outliers among 100 measurements (16.00%)
1 (1.00%) low mild
5 (5.00%) high mild
10 (10.00%) high severe
conv_f64_u32/assembly (aarch64 unix)
time: [480.78 ns 481.35 ns 482.17 ns]
Found 7 outliers among 100 measurements (7.00%)
5 (5.00%) high mild
2 (2.00%) high severe
conv_f64_u64/compiler-builtins
time: [799.56 ns 800.54 ns 801.89 ns]
Found 4 outliers among 100 measurements (4.00%)
2 (2.00%) high mild
2 (2.00%) high severe
conv_f64_u64/system time: [640.72 ns 641.24 ns 641.81 ns]
Found 5 outliers among 100 measurements (5.00%)
3 (3.00%) high mild
2 (2.00%) high severe
conv_f64_u64/assembly (aarch64 unix)
time: [481.54 ns 482.48 ns 483.53 ns]
Found 6 outliers among 100 measurements (6.00%)
1 (1.00%) low severe
1 (1.00%) low mild
3 (3.00%) high mild
1 (1.00%) high severe
conv_f64_u128/compiler-builtins
time: [1.0510 µs 1.0515 µs 1.0520 µs]
Found 13 outliers among 100 measurements (13.00%)
1 (1.00%) low mild
2 (2.00%) high mild
10 (10.00%) high severe
conv_f64_u128/system time: [818.45 ns 819.23 ns 820.15 ns]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
conv_f64_i32/compiler-builtins
time: [800.56 ns 801.31 ns 802.21 ns]
Found 5 outliers among 100 measurements (5.00%)
3 (3.00%) high mild
2 (2.00%) high severe
conv_f64_i32/system time: [765.62 ns 766.15 ns 766.80 ns]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
conv_f64_i32/assembly (aarch64 unix)
time: [471.65 ns 472.77 ns 473.89 ns]
Found 10 outliers among 100 measurements (10.00%)
1 (1.00%) low mild
8 (8.00%) high mild
1 (1.00%) high severe
conv_f64_i64/compiler-builtins
time: [801.00 ns 804.55 ns 808.72 ns]
Found 18 outliers among 100 measurements (18.00%)
6 (6.00%) high mild
12 (12.00%) high severe
conv_f64_i64/system time: [770.28 ns 772.47 ns 775.21 ns]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
conv_f64_i64/assembly (aarch64 unix)
time: [491.56 ns 494.96 ns 499.19 ns]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
conv_f64_i128/compiler-builtins
time: [1.0637 µs 1.0704 µs 1.0762 µs]
Found 5 outliers among 100 measurements (5.00%)
5 (5.00%) high mild
conv_f64_i128/system time: [1.0022 µs 1.0027 µs 1.0033 µs]
Found 4 outliers among 100 measurements (4.00%)
1 (1.00%) low severe
3 (3.00%) high severe
conv_f32_u32/compiler-builtins
time: [644.56 ns 647.01 ns 649.95 ns]
Found 15 outliers among 100 measurements (15.00%)
13 (13.00%) high mild
2 (2.00%) high severe
conv_f32_u32/system time: [648.12 ns 651.20 ns 654.54 ns]
Found 9 outliers among 100 measurements (9.00%)
7 (7.00%) high mild
2 (2.00%) high severe
conv_f32_u32/assembly (aarch64 unix)
time: [481.02 ns 482.71 ns 484.60 ns]
Found 12 outliers among 100 measurements (12.00%)
1 (1.00%) low mild
10 (10.00%) high mild
1 (1.00%) high severe
conv_f32_u64/compiler-builtins
time: [644.14 ns 646.61 ns 649.53 ns]
Found 11 outliers among 100 measurements (11.00%)
6 (6.00%) high mild
5 (5.00%) high severe
conv_f32_u64/system time: [646.21 ns 650.17 ns 654.55 ns]
Found 3 outliers among 100 measurements (3.00%)
3 (3.00%) high mild
conv_f32_u64/assembly (aarch64 unix)
time: [473.36 ns 474.60 ns 476.00 ns]
Found 9 outliers among 100 measurements (9.00%)
2 (2.00%) low mild
5 (5.00%) high mild
2 (2.00%) high severe
conv_f32_u128/compiler-builtins
time: [1.0820 µs 1.0828 µs 1.0839 µs]
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) high mild
1 (1.00%) high severe
conv_f32_u128/system time: [1.0003 µs 1.0042 µs 1.0076 µs]
Found 21 outliers among 100 measurements (21.00%)
1 (1.00%) low mild
3 (3.00%) high mild
17 (17.00%) high severe
conv_f32_i32/compiler-builtins
time: [801.13 ns 801.82 ns 802.53 ns]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high severe
conv_f32_i32/system time: [745.17 ns 745.97 ns 746.78 ns]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high severe
conv_f32_i32/assembly (aarch64 unix)
time: [469.87 ns 470.65 ns 471.57 ns]
Found 1 outliers among 100 measurements (1.00%)
1 (1.00%) high mild
conv_f32_i64/compiler-builtins
time: [799.44 ns 799.94 ns 800.59 ns]
Found 4 outliers among 100 measurements (4.00%)
1 (1.00%) high mild
3 (3.00%) high severe
conv_f32_i64/system time: [744.81 ns 745.17 ns 745.62 ns]
Found 14 outliers among 100 measurements (14.00%)
5 (5.00%) high mild
9 (9.00%) high severe
conv_f32_i64/assembly (aarch64 unix)
time: [465.06 ns 466.01 ns 467.12 ns]
Found 13 outliers among 100 measurements (13.00%)
2 (2.00%) low severe
5 (5.00%) high mild
6 (6.00%) high severe
conv_f32_i128/compiler-builtins
time: [1.1390 µs 1.1515 µs 1.1637 µs]
conv_f32_i128/system time: [1.1315 µs 1.1330 µs 1.1347 µs]
Found 6 outliers among 100 measurements (6.00%)
3 (3.00%) low mild
2 (2.00%) high mild
1 (1.00%) high severe
div_f32/compiler-builtins
time: [39.408 µs 39.676 µs 39.969 µs]
Found 5 outliers among 100 measurements (5.00%)
5 (5.00%) high mild
div_f32/system time: [42.108 µs 42.248 µs 42.528 µs]
Found 11 outliers among 100 measurements (11.00%)
4 (4.00%) high mild
7 (7.00%) high severe
div_f32/assembly (aarch64 unix)
time: [8.0724 µs 8.0794 µs 8.0870 µs]
Found 7 outliers among 100 measurements (7.00%)
5 (5.00%) high mild
2 (2.00%) high severe
div_f64/compiler-builtins
time: [49.992 µs 50.014 µs 50.040 µs]
Found 5 outliers among 100 measurements (5.00%)
5 (5.00%) high severe
div_f64/system time: [53.577 µs 53.651 µs 53.743 µs]
Found 6 outliers among 100 measurements (6.00%)
4 (4.00%) high mild
2 (2.00%) high severe
div_f64/assembly (aarch64 unix)
time: [8.0976 µs 8.1064 µs 8.1158 µs]
Found 6 outliers among 100 measurements (6.00%)
3 (3.00%) high mild
3 (3.00%) high severe
extend_f16_f32/compiler-builtins
time: [804.09 ns 805.38 ns 807.09 ns]
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) high mild
2 (2.00%) high severe
extend_f16_f32/system time: [641.07 ns 641.76 ns 642.60 ns]
Found 12 outliers among 100 measurements (12.00%)
6 (6.00%) high mild
6 (6.00%) high severe
extend_f16_f32/assembly (aarch64 unix)
time: [456.69 ns 457.14 ns 457.68 ns]
Found 8 outliers among 100 measurements (8.00%)
4 (4.00%) low mild
2 (2.00%) high mild
2 (2.00%) high severe
extend_f16_f128/compiler-builtins
time: [1.1025 µs 1.1035 µs 1.1045 µs]
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) high mild
1 (1.00%) high severe
extend_f32_f64/compiler-builtins
time: [799.30 ns 799.68 ns 800.16 ns]
Found 13 outliers among 100 measurements (13.00%)
3 (3.00%) high mild
10 (10.00%) high severe
extend_f32_f64/system time: [992.48 ns 993.27 ns 994.32 ns]
Found 15 outliers among 100 measurements (15.00%)
3 (3.00%) high mild
12 (12.00%) high severe
extend_f32_f64/assembly (aarch64 unix)
time: [457.65 ns 460.39 ns 463.78 ns]
extend_f32_f128/compiler-builtins
time: [1.0295 µs 1.0311 µs 1.0327 µs]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) low mild
1 (1.00%) high mild
extend_f64_f128/compiler-builtins
time: [1.0400 µs 1.0412 µs 1.0426 µs]
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) high mild
mul_f32/compiler-builtins
time: [25.604 µs 25.705 µs 25.818 µs]
Found 23 outliers among 100 measurements (23.00%)
17 (17.00%) low severe
3 (3.00%) high mild
3 (3.00%) high severe
mul_f32/system time: [29.914 µs 29.977 µs 30.043 µs]
Found 5 outliers among 100 measurements (5.00%)
5 (5.00%) high mild
mul_f32/assembly (aarch64 unix)
time: [8.1384 µs 8.1964 µs 8.2603 µs]
Found 13 outliers among 100 measurements (13.00%)
3 (3.00%) high mild
10 (10.00%) high severe
mul_f64/compiler-builtins
time: [25.596 µs 25.615 µs 25.637 µs]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
mul_f64/system time: [30.931 µs 30.963 µs 31.002 µs]
Found 3 outliers among 100 measurements (3.00%)
3 (3.00%) high mild
mul_f64/assembly (aarch64 unix)
time: [8.0589 µs 8.0638 µs 8.0695 µs]
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) high mild
2 (2.00%) high severe
mul_f128/compiler-builtins
time: [54.242 µs 54.306 µs 54.374 µs]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
powi_f32/compiler-builtins
time: [129.91 µs 130.09 µs 130.24 µs]
powi_f32/system time: [126.97 µs 127.34 µs 127.82 µs]
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
powi_f64/compiler-builtins
time: [130.08 µs 130.81 µs 131.46 µs]
Found 13 outliers among 100 measurements (13.00%)
13 (13.00%) high mild
powi_f64/system time: [128.51 µs 128.68 µs 128.88 µs]
Found 21 outliers among 100 measurements (21.00%)
4 (4.00%) high mild
17 (17.00%) high severe
sub_f32/compiler-builtins
time: [37.861 µs 38.012 µs 38.158 µs]
Found 26 outliers among 100 measurements (26.00%)
18 (18.00%) low mild
7 (7.00%) high mild
1 (1.00%) high severe
sub_f32/system time: [39.586 µs 39.628 µs 39.673 µs]
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) high mild
1 (1.00%) high severe
sub_f32/assembly (aarch64 unix)
time: [8.0976 µs 8.1584 µs 8.2208 µs]
Found 6 outliers among 100 measurements (6.00%)
6 (6.00%) high mild
sub_f64/compiler-builtins
time: [37.755 µs 37.838 µs 37.921 µs]
Found 25 outliers among 100 measurements (25.00%)
7 (7.00%) low severe
3 (3.00%) low mild
4 (4.00%) high mild
11 (11.00%) high severe
sub_f64/system time: [39.979 µs 40.019 µs 40.064 µs]
Found 3 outliers among 100 measurements (3.00%)
2 (2.00%) high mild
1 (1.00%) high severe
sub_f64/assembly (aarch64 unix)
time: [8.0669 µs 8.0733 µs 8.0801 µs]
Found 7 outliers among 100 measurements (7.00%)
3 (3.00%) high mild
4 (4.00%) high severe
sub_f128/compiler-builtins
time: [68.618 µs 68.899 µs 69.293 µs]
Found 11 outliers among 100 measurements (11.00%)
2 (2.00%) high mild
9 (9.00%) high severe
trunc_f32_f16/compiler-builtins
time: [1.3343 µs 1.3468 µs 1.3608 µs]
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) high mild
2 (2.00%) high severe
trunc_f32_f16/system time: [1.2687 µs 1.2714 µs 1.2738 µs]
trunc_f32_f16/assembly (aarch64 unix)
time: [470.06 ns 472.96 ns 475.30 ns]
trunc_f64_f16/compiler-builtins
time: [1.2729 µs 1.2738 µs 1.2749 µs]
Found 7 outliers among 100 measurements (7.00%)
2 (2.00%) high mild
5 (5.00%) high severe
trunc_f64_f16/assembly (aarch64 unix)
time: [455.91 ns 456.61 ns 457.33 ns]
Found 12 outliers among 100 measurements (12.00%)
1 (1.00%) low severe
2 (2.00%) low mild
6 (6.00%) high mild
3 (3.00%) high severe
trunc_f64_f32/compiler-builtins
time: [1.2240 µs 1.2325 µs 1.2410 µs]
Found 17 outliers among 100 measurements (17.00%)
4 (4.00%) low mild
2 (2.00%) high mild
11 (11.00%) high severe
trunc_f64_f32/system time: [1.2784 µs 1.2835 µs 1.2884 µs]
Found 10 outliers among 100 measurements (10.00%)
6 (6.00%) low severe
1 (1.00%) low mild
2 (2.00%) high mild
1 (1.00%) high severe
trunc_f64_f32/assembly (aarch64 unix)
time: [455.64 ns 456.08 ns 456.58 ns]
Found 18 outliers among 100 measurements (18.00%)
3 (3.00%) low severe
4 (4.00%) low mild
8 (8.00%) high mild
3 (3.00%) high severe
trunc_f128_f16/compiler-builtins
time: [1.2563 µs 1.2666 µs 1.2776 µs]
Found 3 outliers among 100 measurements (3.00%)
3 (3.00%) high mild
trunc_f128_f32/compiler-builtins
time: [1.2459 µs 1.2482 µs 1.2507 µs]
Found 6 outliers among 100 measurements (6.00%)
2 (2.00%) low mild
2 (2.00%) high mild
2 (2.00%) high severe
trunc_f128_f64/compiler-builtins
time: [1.2821 µs 1.3047 µs 1.3452 µs]
Found 8 outliers among 100 measurements (8.00%)
4 (4.00%) low severe
1 (1.00%) low mild
2 (2.00%) high mild
1 (1.00%) high severe
running 52 tests
test memcmp_builtin_1048576 ... bench: 20,975.52 ns/iter (+/- 239.69) = 49991 MB/s
test memcmp_builtin_16 ... bench: 1.60 ns/iter (+/- 0.05) = 16000 MB/s
test memcmp_builtin_32 ... bench: 1.61 ns/iter (+/- 0.03) = 32000 MB/s
test memcmp_builtin_4096 ... bench: 95.84 ns/iter (+/- 2.82) = 43115 MB/s
test memcmp_builtin_64 ... bench: 2.39 ns/iter (+/- 0.09) = 32000 MB/s
test memcmp_builtin_8 ... bench: 1.60 ns/iter (+/- 0.04) = 8000 MB/s
test memcmp_builtin_unaligned_1048575 ... bench: 22,060.00 ns/iter (+/- 873.55) = 47532 MB/s
test memcmp_builtin_unaligned_15 ... bench: 3.19 ns/iter (+/- 0.02) = 5333 MB/s
test memcmp_builtin_unaligned_31 ... bench: 1.61 ns/iter (+/- 0.01) = 32000 MB/s
test memcmp_builtin_unaligned_4095 ... bench: 96.63 ns/iter (+/- 4.58) = 42666 MB/s
test memcmp_builtin_unaligned_63 ... bench: 2.40 ns/iter (+/- 0.11) = 32000 MB/s
test memcmp_builtin_unaligned_7 ... bench: 3.37 ns/iter (+/- 0.05) = 2666 MB/s
test memcmp_rust_1048576 ... bench: 309,647.23 ns/iter (+/- 6,077.35) = 3386 MB/s
test memcmp_rust_16 ... bench: 5.66 ns/iter (+/- 0.30) = 3200 MB/s
test memcmp_rust_32 ... bench: 10.47 ns/iter (+/- 0.14) = 3200 MB/s
test memcmp_rust_4096 ... bench: 1,124.34 ns/iter (+/- 36.92) = 3644 MB/s
test memcmp_rust_64 ... bench: 19.90 ns/iter (+/- 0.36) = 3368 MB/s
test memcmp_rust_8 ... bench: 3.46 ns/iter (+/- 0.11) = 2666 MB/s
test memcmp_rust_unaligned_1048575 ... bench: 308,613.87 ns/iter (+/- 6,613.18) = 3397 MB/s
test memcmp_rust_unaligned_15 ... bench: 5.35 ns/iter (+/- 0.05) = 3200 MB/s
test memcmp_rust_unaligned_31 ... bench: 9.94 ns/iter (+/- 0.06) = 3555 MB/s
test memcmp_rust_unaligned_4095 ... bench: 1,120.06 ns/iter (+/- 5.03) = 3657 MB/s
test memcmp_rust_unaligned_63 ... bench: 19.64 ns/iter (+/- 0.82) = 3368 MB/s
test memcmp_rust_unaligned_7 ... bench: 3.22 ns/iter (+/- 0.10) = 2666 MB/s
test memcpy_builtin_1048576 ... bench: 12,538.05 ns/iter (+/- 354.79) = 83631 MB/s
test memcpy_builtin_1048576_misalign ... bench: 30,092.56 ns/iter (+/- 8,064.04) = 34845 MB/s
test memcpy_builtin_1048576_offset ... bench: 12,538.36 ns/iter (+/- 359.04) = 83631 MB/s
test memcpy_builtin_4096 ... bench: 44.24 ns/iter (+/- 6.80) = 93090 MB/s
test memcpy_builtin_4096_misalign ... bench: 45.34 ns/iter (+/- 2.13) = 91022 MB/s
test memcpy_builtin_4096_offset ... bench: 44.71 ns/iter (+/- 0.61) = 93090 MB/s
test memcpy_rust_1048576 ... bench: 17,943.33 ns/iter (+/- 243.18) = 58439 MB/s
test memcpy_rust_1048576_misalign ... bench: 15,004.68 ns/iter (+/- 3,978.65) = 69886 MB/s
test memcpy_rust_1048576_offset ... bench: 14,722.06 ns/iter (+/- 479.54) = 71225 MB/s
test memcpy_rust_4096 ... bench: 44.91 ns/iter (+/- 4.62) = 93090 MB/s
test memcpy_rust_4096_misalign ... bench: 76.21 ns/iter (+/- 8.21) = 53894 MB/s
test memcpy_rust_4096_offset ... bench: 76.27 ns/iter (+/- 4.69) = 53894 MB/s
test memmove_builtin_1048576 ... bench: 18,644.50 ns/iter (+/- 379.84) = 56242 MB/s
test memmove_builtin_1048576_misalign ... bench: 18,947.70 ns/iter (+/- 1,226.26) = 55342 MB/s
test memmove_builtin_4096 ... bench: 44.21 ns/iter (+/- 0.79) = 93090 MB/s
test memmove_builtin_4096_misalign ... bench: 47.21 ns/iter (+/- 3.12) = 87148 MB/s
test memmove_rust_1048576 ... bench: 34,813.33 ns/iter (+/- 3,637.47) = 30120 MB/s
test memmove_rust_1048576_misalign ... bench: 35,067.19 ns/iter (+/- 1,699.63) = 29902 MB/s
test memmove_rust_4096 ... bench: 148.69 ns/iter (+/- 1.31) = 27675 MB/s
test memmove_rust_4096_misalign ... bench: 153.81 ns/iter (+/- 1.71) = 26771 MB/s
test memset_builtin_1048576 ... bench: 15,704.12 ns/iter (+/- 12,113.86) = 66771 MB/s
test memset_builtin_1048576_offset ... bench: 17,894.23 ns/iter (+/- 175.12) = 58599 MB/s
test memset_builtin_4096 ... bench: 39.95 ns/iter (+/- 0.19) = 105025 MB/s
test memset_builtin_4096_offset ... bench: 40.48 ns/iter (+/- 3.11) = 102400 MB/s
test memset_rust_1048576 ... bench: 10,600.66 ns/iter (+/- 1,559.93) = 98922 MB/s
test memset_rust_1048576_offset ... bench: 14,810.85 ns/iter (+/- 575.27) = 70801 MB/s
test memset_rust_4096 ... bench: 37.91 ns/iter (+/- 2.77) = 110702 MB/s
test memset_rust_4096_offset ... bench: 59.99 ns/iter (+/- 10.45) = 69423 MB/s
test result: ok. 0 passed; 0 failed; 0 ignored; 52 measured; 0 filtered out; finished in 97.74s

View file

@ -0,0 +1,81 @@
#![feature(f128)]
use compiler_builtins::float::add;
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
float_bench! {
name: add_f32,
sig: (a: f32, b: f32) -> f32,
crate_fn: add::__addsf3,
sys_fn: __addsf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"addss {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fadd {a:s}, {a:s}, {b:s}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: add_f64,
sig: (a: f64, b: f64) -> f64,
crate_fn: add::__adddf3,
sys_fn: __adddf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"addsd {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fadd {a:d}, {a:d}, {b:d}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: add_f128,
sig: (a: f128, b: f128) -> f128,
crate_fn: add::__addtf3,
crate_fn_ppc: add::__addkf3,
sys_fn: __addtf3,
sys_fn_ppc: __addkf3,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
criterion_group!(float_add, add_f32, add_f64, add_f128);
criterion_main!(float_add);

View file

@ -0,0 +1,202 @@
#![feature(f128)]
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
use compiler_builtins::float::cmp;
/// `gt` symbols are allowed to return differing results, they just get compared
/// to 0.
fn gt_res_eq(a: i32, b: i32) -> bool {
let a_lt_0 = a <= 0;
let b_lt_0 = b <= 0;
(a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0)
}
float_bench! {
name: cmp_f32_gt,
sig: (a: f32, b: f32) -> i32,
crate_fn: cmp::__gtsf2,
sys_fn: __gtsf2,
sys_available: all(),
output_eq: gt_res_eq,
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: i32;
asm!(
"xor {ret:e}, {ret:e}",
"ucomiss {a}, {b}",
"seta {ret:l}",
a = in(xmm_reg) a,
b = in(xmm_reg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcmp {a:s}, {b:s}",
"cset {ret:w}, gt",
a = in(vreg) a,
b = in(vreg) b,
ret = out(reg) ret,
options(nomem,nostack),
);
ret
};
],
}
float_bench! {
name: cmp_f32_unord,
sig: (a: f32, b: f32) -> i32,
crate_fn: cmp::__unordsf2,
sys_fn: __unordsf2,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: i32;
asm!(
"xor {ret:e}, {ret:e}",
"ucomiss {a}, {b}",
"setp {ret:l}",
a = in(xmm_reg) a,
b = in(xmm_reg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcmp {a:s}, {b:s}",
"cset {ret:w}, vs",
a = in(vreg) a,
b = in(vreg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
],
}
float_bench! {
name: cmp_f64_gt,
sig: (a: f64, b: f64) -> i32,
crate_fn: cmp::__gtdf2,
sys_fn: __gtdf2,
sys_available: all(),
output_eq: gt_res_eq,
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: i32;
asm!(
"xor {ret:e}, {ret:e}",
"ucomisd {a}, {b}",
"seta {ret:l}",
a = in(xmm_reg) a,
b = in(xmm_reg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcmp {a:d}, {b:d}",
"cset {ret:w}, gt",
a = in(vreg) a,
b = in(vreg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
],
}
float_bench! {
name: cmp_f64_unord,
sig: (a: f64, b: f64) -> i32,
crate_fn: cmp::__unorddf2,
sys_fn: __unorddf2,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: i32;
asm!(
"xor {ret:e}, {ret:e}",
"ucomisd {a}, {b}",
"setp {ret:l}",
a = in(xmm_reg) a,
b = in(xmm_reg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcmp {a:d}, {b:d}",
"cset {ret:w}, vs",
a = in(vreg) a,
b = in(vreg) b,
ret = out(reg) ret,
options(nomem, nostack, pure)
);
ret
};
],
}
float_bench! {
name: cmp_f128_gt,
sig: (a: f128, b: f128) -> i32,
crate_fn: cmp::__gttf2,
crate_fn_ppc: cmp::__gtkf2,
sys_fn: __gttf2,
sys_fn_ppc: __gtkf2,
sys_available: not(feature = "no-sys-f128"),
output_eq: gt_res_eq,
asm: []
}
float_bench! {
name: cmp_f128_unord,
sig: (a: f128, b: f128) -> i32,
crate_fn: cmp::__unordtf2,
crate_fn_ppc: cmp::__unordkf2,
sys_fn: __unordtf2,
sys_fn_ppc: __unordkf2,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
criterion_group!(
float_cmp,
cmp_f32_gt,
cmp_f32_unord,
cmp_f64_gt,
cmp_f64_unord,
cmp_f128_gt,
cmp_f128_unord
);
criterion_main!(float_cmp);

View file

@ -0,0 +1,547 @@
#![feature(f128)]
#![allow(improper_ctypes)]
use compiler_builtins::float::conv;
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
/* unsigned int -> float */
float_bench! {
name: conv_u32_f32,
sig: (a: u32) -> f32,
crate_fn: conv::__floatunsisf,
sys_fn: __floatunsisf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f32;
asm!(
"mov {tmp:e}, {a:e}",
"cvtsi2ss {ret}, {tmp}",
a = in(reg) a,
tmp = out(reg) _,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"ucvtf {ret:s}, {a:w}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_u32_f64,
sig: (a: u32) -> f64,
crate_fn: conv::__floatunsidf,
sys_fn: __floatunsidf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f64;
asm!(
"mov {tmp:e}, {a:e}",
"cvtsi2sd {ret}, {tmp}",
a = in(reg) a,
tmp = out(reg) _,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"ucvtf {ret:d}, {a:w}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_u64_f32,
sig: (a: u64) -> f32,
crate_fn: conv::__floatundisf,
sys_fn: __floatundisf,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"ucvtf {ret:s}, {a:x}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_u64_f64,
sig: (a: u64) -> f64,
crate_fn: conv::__floatundidf,
sys_fn: __floatundidf,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"ucvtf {ret:d}, {a:x}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_u128_f32,
sig: (a: u128) -> f32,
crate_fn: conv::__floatuntisf,
sys_fn: __floatuntisf,
sys_available: all(),
asm: []
}
float_bench! {
name: conv_u128_f64,
sig: (a: u128) -> f64,
crate_fn: conv::__floatuntidf,
sys_fn: __floatuntidf,
sys_available: all(),
asm: []
}
/* signed int -> float */
float_bench! {
name: conv_i32_f32,
sig: (a: i32) -> f32,
crate_fn: conv::__floatsisf,
sys_fn: __floatsisf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f32;
asm!(
"cvtsi2ss {ret}, {a:e}",
a = in(reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"scvtf {ret:s}, {a:w}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_i32_f64,
sig: (a: i32) -> f64,
crate_fn: conv::__floatsidf,
sys_fn: __floatsidf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f64;
asm!(
"cvtsi2sd {ret}, {a:e}",
a = in(reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"scvtf {ret:d}, {a:w}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_i64_f32,
sig: (a: i64) -> f32,
crate_fn: conv::__floatdisf,
sys_fn: __floatdisf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f32;
asm!(
"cvtsi2ss {ret}, {a:r}",
a = in(reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"scvtf {ret:s}, {a:x}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_i64_f64,
sig: (a: i64) -> f64,
crate_fn: conv::__floatdidf,
sys_fn: __floatdidf,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f64;
asm!(
"cvtsi2sd {ret}, {a:r}",
a = in(reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"scvtf {ret:d}, {a:x}",
a = in(reg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_i128_f32,
sig: (a: i128) -> f32,
crate_fn: conv::__floattisf,
sys_fn: __floattisf,
sys_available: all(),
asm: []
}
float_bench! {
name: conv_i128_f64,
sig: (a: i128) -> f64,
crate_fn: conv::__floattidf,
sys_fn: __floattidf,
sys_available: all(),
asm: []
}
/* float -> unsigned int */
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_u32,
sig: (a: f32) -> u32,
crate_fn: conv::__fixunssfsi,
sys_fn: __fixunssfsi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: u32;
asm!(
"fcvtzu {ret:w}, {a:s}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_u64,
sig: (a: f32) -> u64,
crate_fn: conv::__fixunssfdi,
sys_fn: __fixunssfdi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: u64;
asm!(
"fcvtzu {ret:x}, {a:s}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_u128,
sig: (a: f32) -> u128,
crate_fn: conv::__fixunssfti,
sys_fn: __fixunssfti,
sys_available: all(),
asm: []
}
float_bench! {
name: conv_f64_u32,
sig: (a: f64) -> u32,
crate_fn: conv::__fixunsdfsi,
sys_fn: __fixunsdfsi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: u32;
asm!(
"fcvtzu {ret:w}, {a:d}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_f64_u64,
sig: (a: f64) -> u64,
crate_fn: conv::__fixunsdfdi,
sys_fn: __fixunsdfdi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: u64;
asm!(
"fcvtzu {ret:x}, {a:d}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_f64_u128,
sig: (a: f64) -> u128,
crate_fn: conv::__fixunsdfti,
sys_fn: __fixunsdfti,
sys_available: all(),
asm: []
}
/* float -> signed int */
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_i32,
sig: (a: f32) -> i32,
crate_fn: conv::__fixsfsi,
sys_fn: __fixsfsi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcvtzs {ret:w}, {a:s}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_i64,
sig: (a: f32) -> i64,
crate_fn: conv::__fixsfdi,
sys_fn: __fixsfdi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: i64;
asm!(
"fcvtzs {ret:x}, {a:s}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
float_bench! {
name: conv_f32_i128,
sig: (a: f32) -> i128,
crate_fn: conv::__fixsfti,
sys_fn: __fixsfti,
sys_available: all(),
asm: []
}
float_bench! {
name: conv_f64_i32,
sig: (a: f64) -> i32,
crate_fn: conv::__fixdfsi,
sys_fn: __fixdfsi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: i32;
asm!(
"fcvtzs {ret:w}, {a:d}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_f64_i64,
sig: (a: f64) -> i64,
crate_fn: conv::__fixdfdi,
sys_fn: __fixdfdi,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: i64;
asm!(
"fcvtzs {ret:x}, {a:d}",
a = in(vreg) a,
ret = lateout(reg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: conv_f64_i128,
sig: (a: f64) -> i128,
crate_fn: conv::__fixdfti,
sys_fn: __fixdfti,
sys_available: all(),
asm: []
}
criterion_group!(
float_conv,
conv_u32_f32,
conv_u32_f64,
conv_u64_f32,
conv_u64_f64,
conv_u128_f32,
conv_u128_f64,
conv_i32_f32,
conv_i32_f64,
conv_i64_f32,
conv_i64_f64,
conv_i128_f32,
conv_i128_f64,
conv_f64_u32,
conv_f64_u64,
conv_f64_u128,
conv_f64_i32,
conv_f64_i64,
conv_f64_i128,
);
// FIXME: ppc64le has a sporadic overflow panic in the crate functions
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
criterion_group!(
float_conv_not_ppc64le,
conv_f32_u32,
conv_f32_u64,
conv_f32_u128,
conv_f32_i32,
conv_f32_i64,
conv_f32_i128,
);
#[cfg(all(target_arch = "powerpc64", target_endian = "little"))]
criterion_main!(float_conv);
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
criterion_main!(float_conv, float_conv_not_ppc64le);

View file

@ -0,0 +1,70 @@
#![feature(f128)]
use compiler_builtins::float::div;
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
float_bench! {
name: div_f32,
sig: (a: f32, b: f32) -> f32,
crate_fn: div::__divsf3,
sys_fn: __divsf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"divss {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fdiv {a:s}, {a:s}, {b:s}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: div_f64,
sig: (a: f64, b: f64) -> f64,
crate_fn: div::__divdf3,
sys_fn: __divdf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"divsd {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fdiv {a:d}, {a:d}, {b:d}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
criterion_group!(float_div, div_f32, div_f64);
criterion_main!(float_div);

View file

@ -0,0 +1,93 @@
#![allow(unused_variables)] // "unused" f16 registers
#![feature(f128)]
#![feature(f16)]
use compiler_builtins::float::extend;
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
float_bench! {
name: extend_f16_f32,
sig: (a: f16) -> f32,
crate_fn: extend::__extendhfsf2,
sys_fn: __extendhfsf2,
sys_available: not(feature = "no-sys-f16"),
asm: [
#[cfg(target_arch = "aarch64")] {
// FIXME(f16_f128): remove `to_bits()` after f16 asm support (rust-lang/rust/#116909)
let ret: f32;
asm!(
"fcvt {ret:s}, {a:h}",
a = in(vreg) a.to_bits(),
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: extend_f16_f128,
sig: (a: f16) -> f128,
crate_fn: extend::__extendhftf2,
crate_fn_ppc: extend::__extendhfkf2,
sys_fn: __extendhftf2,
sys_fn_ppc: __extendhfkf2,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: [],
}
float_bench! {
name: extend_f32_f64,
sig: (a: f32) -> f64,
crate_fn: extend::__extendsfdf2,
sys_fn: __extendsfdf2,
sys_available: all(),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"fcvt {ret:d}, {a:s}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: extend_f32_f128,
sig: (a: f32) -> f128,
crate_fn: extend::__extendsftf2,
crate_fn_ppc: extend::__extendsfkf2,
sys_fn: __extendsftf2,
sys_fn_ppc: __extendsfkf2,
sys_available: not(feature = "no-sys-f128"),
asm: [],
}
float_bench! {
name: extend_f64_f128,
sig: (a: f64) -> f128,
crate_fn: extend::__extenddftf2,
crate_fn_ppc: extend::__extenddfkf2,
sys_fn: __extenddftf2,
sys_fn_ppc: __extenddfkf2,
sys_available: not(feature = "no-sys-f128"),
asm: [],
}
criterion_group!(
float_extend,
extend_f16_f32,
extend_f16_f128,
extend_f32_f64,
extend_f32_f128,
extend_f64_f128,
);
criterion_main!(float_extend);

View file

@ -0,0 +1,81 @@
#![feature(f128)]
use compiler_builtins::float::mul;
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
float_bench! {
name: mul_f32,
sig: (a: f32, b: f32) -> f32,
crate_fn: mul::__mulsf3,
sys_fn: __mulsf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"mulss {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fmul {a:s}, {a:s}, {b:s}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: mul_f64,
sig: (a: f64, b: f64) -> f64,
crate_fn: mul::__muldf3,
sys_fn: __muldf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"mulsd {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fmul {a:d}, {a:d}, {b:d}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: mul_f128,
sig: (a: f128, b: f128) -> f128,
crate_fn: mul::__multf3,
crate_fn_ppc: mul::__mulkf3,
sys_fn: __multf3,
sys_fn_ppc: __mulkf3,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
criterion_group!(float_mul, mul_f32, mul_f64, mul_f128);
criterion_main!(float_mul);

View file

@ -0,0 +1,24 @@
use compiler_builtins::float::pow;
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
float_bench! {
name: powi_f32,
sig: (a: f32, b: i32) -> f32,
crate_fn: pow::__powisf2,
sys_fn: __powisf2,
sys_available: all(),
asm: [],
}
float_bench! {
name: powi_f64,
sig: (a: f64, b: i32) -> f64,
crate_fn: pow::__powidf2,
sys_fn: __powidf2,
sys_available: all(),
asm: [],
}
criterion_group!(float_add, powi_f32, powi_f64);
criterion_main!(float_add);

View file

@ -0,0 +1,81 @@
#![feature(f128)]
use compiler_builtins::float::sub;
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
float_bench! {
name: sub_f32,
sig: (a: f32, b: f32) -> f32,
crate_fn: sub::__subsf3,
sys_fn: __subsf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"subss {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fsub {a:s}, {a:s}, {b:s}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: sub_f64,
sig: (a: f64, b: f64) -> f64,
crate_fn: sub::__subdf3,
sys_fn: __subdf3,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
asm!(
"subsd {a}, {b}",
a = inout(xmm_reg) a,
b = in(xmm_reg) b,
options(nomem, nostack, pure)
);
a
};
#[cfg(target_arch = "aarch64")] {
asm!(
"fsub {a:d}, {a:d}, {b:d}",
a = inout(vreg) a,
b = in(vreg) b,
options(nomem, nostack, pure)
);
a
};
],
}
float_bench! {
name: sub_f128,
sig: (a: f128, b: f128) -> f128,
crate_fn: sub::__subtf3,
crate_fn_ppc: sub::__subkf3,
sys_fn: __subtf3,
sys_fn_ppc: __subkf3,
sys_available: not(feature = "no-sys-f128"),
asm: []
}
criterion_group!(float_sub, sub_f32, sub_f64, sub_f128);
criterion_main!(float_sub);

View file

@ -0,0 +1,127 @@
#![feature(f128)]
#![feature(f16)]
use compiler_builtins::float::trunc;
use criterion::{criterion_group, criterion_main, Criterion};
use testcrate::float_bench;
float_bench! {
name: trunc_f32_f16,
sig: (a: f32) -> f16,
crate_fn: trunc::__truncsfhf2,
sys_fn: __truncsfhf2,
sys_available: not(feature = "no-sys-f16"),
asm: [
#[cfg(target_arch = "aarch64")] {
// FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909)
let ret: u16;
asm!(
"fcvt {ret:h}, {a:s}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
f16::from_bits(ret)
};
],
}
float_bench! {
name: trunc_f64_f16,
sig: (a: f64) -> f16,
crate_fn: trunc::__truncdfhf2,
sys_fn: __truncdfhf2,
sys_available: not(feature = "no-sys-f128"),
asm: [
#[cfg(target_arch = "aarch64")] {
// FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909)
let ret: u16;
asm!(
"fcvt {ret:h}, {a:d}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
f16::from_bits(ret)
};
],
}
float_bench! {
name: trunc_f64_f32,
sig: (a: f64) -> f32,
crate_fn: trunc::__truncdfsf2,
sys_fn: __truncdfsf2,
sys_available: all(),
asm: [
#[cfg(target_arch = "x86_64")] {
let ret: f32;
asm!(
"cvtsd2ss {ret}, {a}",
a = in(xmm_reg) a,
ret = lateout(xmm_reg) ret,
options(nomem, nostack, pure),
);
ret
};
#[cfg(target_arch = "aarch64")] {
let ret: f32;
asm!(
"fcvt {ret:s}, {a:d}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
float_bench! {
name: trunc_f128_f16,
sig: (a: f128) -> f16,
crate_fn: trunc::__trunctfhf2,
crate_fn_ppc: trunc::__trunckfhf2,
sys_fn: __trunctfhf2,
sys_fn_ppc: __trunckfhf2,
sys_available: not(feature = "no-sys-f16-f128-convert"),
asm: [],
}
float_bench! {
name: trunc_f128_f32,
sig: (a: f128) -> f32,
crate_fn: trunc::__trunctfsf2,
crate_fn_ppc: trunc::__trunckfsf2,
sys_fn: __trunctfsf2,
sys_fn_ppc: __trunckfsf2,
sys_available: not(feature = "no-sys-f128"),
asm: [],
}
float_bench! {
name: trunc_f128_f64,
sig: (a: f128) -> f64,
crate_fn: trunc::__trunctfdf2,
crate_fn_ppc: trunc::__trunckfdf2,
sys_fn: __trunctfdf2,
sys_fn_ppc: __trunckfdf2,
sys_available: not(feature = "no-sys-f128"),
asm: [],
}
criterion_group!(
float_trunc,
trunc_f32_f16,
trunc_f64_f16,
trunc_f64_f32,
trunc_f128_f16,
trunc_f128_f32,
trunc_f128_f64,
);
criterion_main!(float_trunc);

View file

@ -5,6 +5,8 @@ use std::{collections::HashSet, env};
enum Feature {
NoSysF128,
NoSysF128IntConvert,
NoSysF16,
NoSysF16F128Convert,
}
fn main() {
@ -31,6 +33,7 @@ fn main() {
{
features.insert(Feature::NoSysF128);
features.insert(Feature::NoSysF128IntConvert);
features.insert(Feature::NoSysF16F128Convert);
}
if target.starts_with("i586") || target.starts_with("i686") {
@ -38,6 +41,17 @@ fn main() {
features.insert(Feature::NoSysF128IntConvert);
}
if target.contains("-unknown-linux-") {
// No `__extendhftf2` on x86, no `__trunctfhf2` on aarch64
features.insert(Feature::NoSysF16F128Convert);
}
if target.starts_with("wasm32-") {
// Linking says "error: function signature mismatch: __extendhfsf2" and seems to
// think the signature is either `(i32) -> f32` or `(f32) -> f32`
features.insert(Feature::NoSysF16);
}
for feature in features {
let (name, warning) = match feature {
Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
@ -45,6 +59,11 @@ fn main() {
"no-sys-f128-int-convert",
"using apfloat fallback for f128 to int conversions",
),
Feature::NoSysF16F128Convert => (
"no-sys-f16-f128-convert",
"skipping using apfloat fallback for f16 <-> f128 conversions",
),
Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"),
};
println!("cargo:warning={warning}");
println!("cargo:rustc-cfg=feature=\"{name}\"");

View file

@ -0,0 +1,348 @@
use core::cell::RefCell;
use alloc::vec::Vec;
use compiler_builtins::float::Float;
/// Fuzz with these many items to ensure equal functions
pub const CHECK_ITER_ITEMS: u32 = 10_000;
/// Benchmark with this many items to get a variety
pub const BENCH_ITER_ITEMS: u32 = 500;
/// Still run benchmarks/tests but don't check correctness between compiler-builtins and
/// builtin system functions functions
pub fn skip_sys_checks(test_name: &str) -> bool {
const ALWAYS_SKIPPED: &[&str] = &[
// FIXME(f16_f128): system symbols have incorrect results
// <https://github.com/rust-lang/compiler-builtins/issues/617>
"extend_f16_f32",
"trunc_f32_f16",
"trunc_f64_f16",
// FIXME(f16_f128): rounding error
// <https://github.com/rust-lang/compiler-builtins/issues/616>
"mul_f128",
];
// FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely
// in their benchmark modules due to runtime panics.
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"];
// FIXME(f16_f128): system symbols have incorrect results
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
const X86_NO_SSE_SKIPPED: &[&str] = &["add_f128", "sub_f128", "powi_f32", "powi_f64"];
// FIXME(llvm): system symbols have incorrect results on Windows
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
const WINDOWS_SKIPPED: &[&str] = &[
"conv_f32_u128",
"conv_f32_i128",
"conv_f64_u128",
"conv_f64_i128",
];
if cfg!(target_arch = "arm") {
// The Arm symbols need a different ABI that our macro doesn't handle, just skip it
return true;
}
if ALWAYS_SKIPPED.contains(&test_name) {
return true;
}
if cfg!(all(target_arch = "powerpc64", target_endian = "little"))
&& PPC64LE_SKIPPED.contains(&test_name)
{
return true;
}
if cfg!(all(target_arch = "x86", not(target_feature = "sse")))
&& X86_NO_SSE_SKIPPED.contains(&test_name)
{
return true;
}
if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) {
return true;
}
false
}
/// Still run benchmarks/tests but don't check correctness between compiler-builtins and
/// assembly functions
pub fn skip_asm_checks(test_name: &str) -> bool {
// FIXME(f16_f128): rounding error
// <https://github.com/rust-lang/compiler-builtins/issues/616>
const SKIPPED: &[&str] = &["mul_f32", "mul_f64"];
SKIPPED.contains(&test_name)
}
/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten
/// assembly.
#[macro_export]
macro_rules! float_bench {
(
// Name of this benchmark
name: $name:ident,
// The function signature to be tested
sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty,
// Path to the crate in compiler_builtins
crate_fn: $crate_fn:path,
// Optional alias on ppc
$( crate_fn_ppc: $crate_fn_ppc:path, )?
// Name of the system symbol
sys_fn: $sys_fn:ident,
// Optional alias on ppc
$( sys_fn_ppc: $sys_fn_ppc:path, )?
// Meta saying whether the system symbol is available
sys_available: $sys_available:meta,
// An optional function to validate the results of two functions are equal, if not
// just `$ret_ty::check_eq`
$( output_eq: $output_eq:expr, )?
// Assembly implementations, if any.
asm: [
$(
#[cfg($asm_meta:meta)] {
$($asm_tt:tt)*
}
);*
$(;)?
]
$(,)?
) => {paste::paste! {
#[cfg($sys_available)]
extern "C" {
/// Binding for the system function
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty;
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
float_bench! { @coalesce_fn $($sys_fn_ppc)? =>
fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty;
}
}
fn $name(c: &mut Criterion) {
use core::hint::black_box;
use compiler_builtins::float::Float;
use $crate::bench::TestIO;
#[inline(never)] // equalize with external calls
fn crate_fn($($arg: $arg_ty),*) -> $ret_ty {
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
let target_crate_fn = $crate_fn;
// On PPC, use an alias if specified
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn);
target_crate_fn( $($arg),* )
}
#[inline(always)] // already a branch
#[cfg($sys_available)]
fn sys_fn($($arg: $arg_ty),*) -> $ret_ty {
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
let target_sys_fn = $sys_fn;
// On PPC, use an alias if specified
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn);
unsafe { target_sys_fn( $($arg),* ) }
}
#[inline(never)] // equalize with external calls
#[cfg(any( $($asm_meta),* ))]
fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty {
use core::arch::asm;
$(
#[cfg($asm_meta)]
unsafe { $($asm_tt)* }
)*
}
let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS);
let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS);
let test_name = stringify!($name);
let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq);
// Verify math lines up. We run the crate functions even if we don't validate the
// output here to make sure there are no panics or crashes.
#[cfg($sys_available)]
for ($($arg),*) in testvec.iter().copied() {
let crate_res = crate_fn($($arg),*);
let sys_res = sys_fn($($arg),*);
if $crate::bench::skip_sys_checks(test_name) {
continue;
}
assert!(
check_eq(crate_res, sys_res),
"{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}",
($($arg),* ,)
);
}
#[cfg(any( $($asm_meta),* ))]
{
for ($($arg),*) in testvec.iter().copied() {
let crate_res = crate_fn($($arg),*);
let asm_res = asm_fn($($arg),*);
if $crate::bench::skip_asm_checks(test_name) {
continue;
}
assert!(
check_eq(crate_res, asm_res),
"{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}",
($($arg),* ,)
);
}
}
let mut group = c.benchmark_group(test_name);
group.bench_function("compiler-builtins", |b| b.iter(|| {
for ($($arg),*) in benchvec.iter().copied() {
black_box(crate_fn( $(black_box($arg)),* ));
}
}));
#[cfg($sys_available)]
group.bench_function("system", |b| b.iter(|| {
for ($($arg),*) in benchvec.iter().copied() {
black_box(sys_fn( $(black_box($arg)),* ));
}
}));
#[cfg(any( $($asm_meta),* ))]
group.bench_function(&format!(
"assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY
), |b| b.iter(|| {
for ($($arg),*) in benchvec.iter().copied() {
black_box(asm_fn( $(black_box($arg)),* ));
}
}));
group.finish();
}
}};
// Allow overriding a default
(@coalesce $specified:expr, $default:expr) => { $specified };
(@coalesce, $default:expr) => { $default };
// Allow overriding a function name
(@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => {
fn $specified $($tt)+
};
(@coalesce_fn => fn $default_name:ident $($tt:tt)+) => {
fn $default_name $($tt)+
};
}
/// A type used as either an input or output to/from a benchmark function.
pub trait TestIO: Sized {
fn make_testvec(len: u32) -> Vec<Self>;
fn check_eq(a: Self, b: Self) -> bool;
}
macro_rules! impl_testio {
(float $($f_ty:ty),+) => {$(
impl TestIO for $f_ty {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ret = RefCell::new(Vec::new());
crate::fuzz_float(len, |a| ret.borrow_mut().push(a));
ret.into_inner()
}
fn check_eq(a: Self, b: Self) -> bool {
Float::eq_repr(a, b)
}
}
impl TestIO for ($f_ty, $f_ty) {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ret = RefCell::new(Vec::new());
crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b)));
ret.into_inner()
}
fn check_eq(_a: Self, _b: Self) -> bool {
unimplemented!()
}
}
)*};
(int $($i_ty:ty),+) => {$(
impl TestIO for $i_ty {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ret = RefCell::new(Vec::new());
crate::fuzz(len, |a| ret.borrow_mut().push(a));
ret.into_inner()
}
fn check_eq(a: Self, b: Self) -> bool {
a == b
}
}
impl TestIO for ($i_ty, $i_ty) {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ret = RefCell::new(Vec::new());
crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b)));
ret.into_inner()
}
fn check_eq(_a: Self, _b: Self) -> bool {
unimplemented!()
}
}
)*};
((float, int) ($f_ty:ty, $i_ty:ty)) => {
impl TestIO for ($f_ty, $i_ty) {
fn make_testvec(len: u32) -> Vec<Self> {
// refcell because fuzz_* takes a `Fn`
let ivec = RefCell::new(Vec::new());
let fvec = RefCell::new(Vec::new());
crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a));
crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a));
let mut ret = Vec::new();
let ivec = ivec.into_inner();
let fvec = fvec.into_inner();
for f in fvec {
for i in &ivec {
ret.push((f, *i));
}
}
ret
}
fn check_eq(_a: Self, _b: Self) -> bool {
unimplemented!()
}
}
}
}
#[cfg(not(feature = "no-f16-f128"))]
impl_testio!(float f16, f128);
impl_testio!(float f32, f64);
impl_testio!(int i16, i32, i64, i128);
impl_testio!(int u16, u32, u64, u128);
impl_testio!((float, int)(f32, i32));
impl_testio!((float, int)(f64, i32));

View file

@ -13,6 +13,12 @@
//! Some floating point tests are disabled for specific architectures, because they do not have
//! correct rounding.
#![no_std]
#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))]
#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))]
#![feature(isqrt)]
pub mod bench;
extern crate alloc;
use compiler_builtins::float::Float;
use compiler_builtins::int::{Int, MinInt};