Merge pull request #618 from tgross35/benchmarking
Add benchmarks for floating point math
This commit is contained in:
commit
46e377ae5f
16 changed files with 2930 additions and 2 deletions
|
|
@ -4,7 +4,9 @@ set -eux
|
|||
|
||||
target="${1:-}"
|
||||
|
||||
if [ -z "${1:-}" ]; then
|
||||
export RUST_BACKTRACE="${RUST_BACKTRACE:-full}"
|
||||
|
||||
if [ -z "$target" ]; then
|
||||
host_target=$(rustc -vV | awk '/^host/ { print $2 }')
|
||||
echo "Defaulted to host target $host_target"
|
||||
target="$host_target"
|
||||
|
|
@ -30,6 +32,8 @@ else
|
|||
$run --features no-asm --release
|
||||
$run --features no-f16-f128
|
||||
$run --features no-f16-f128 --release
|
||||
$run --benches
|
||||
$run --benches --release
|
||||
fi
|
||||
|
||||
if [ "${TEST_VERBATIM:-}" = "1" ]; then
|
||||
|
|
|
|||
|
|
@ -21,6 +21,10 @@ path = ".."
|
|||
default-features = false
|
||||
features = ["public-test-deps"]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", default-features = false, features = ["cargo_bench_support"] }
|
||||
paste = "1.0.15"
|
||||
|
||||
[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
|
||||
test = { git = "https://github.com/japaric/utest" }
|
||||
utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" }
|
||||
|
|
@ -34,6 +38,48 @@ no-f16-f128 = ["compiler_builtins/no-f16-f128"]
|
|||
mem = ["compiler_builtins/mem"]
|
||||
mangled-names = ["compiler_builtins/mangled-names"]
|
||||
# Skip tests that rely on f128 symbols being available on the system
|
||||
no-sys-f128 = ["no-sys-f128-int-convert"]
|
||||
no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
|
||||
# Some platforms have some f128 functions but everything except integer conversions
|
||||
no-sys-f128-int-convert = []
|
||||
no-sys-f16-f128-convert = []
|
||||
# Skip tests that rely on f16 symbols being available on the system
|
||||
no-sys-f16 = []
|
||||
|
||||
# Enable report generation without bringing in more dependencies by default
|
||||
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
|
||||
|
||||
[[bench]]
|
||||
name = "float_add"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_sub"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_mul"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_div"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_cmp"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_conv"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_extend"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_trunc"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "float_pow"
|
||||
harness = false
|
||||
|
|
|
|||
|
|
@ -0,0 +1,500 @@
|
|||
|
||||
running 0 tests
|
||||
|
||||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
|
||||
|
||||
add_f32 compiler-builtins
|
||||
time: [35.804 µs 35.863 µs 35.920 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
2 (2.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
|
||||
add_f32 system time: [39.084 µs 39.127 µs 39.169 µs]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
7 (7.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
add_f32 assembly (aarch64 unix)
|
||||
time: [8.1034 µs 8.1441 µs 8.1866 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
4 (4.00%) high mild
|
||||
|
||||
add_f64 compiler-builtins
|
||||
time: [35.647 µs 35.725 µs 35.799 µs]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
8 (8.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
add_f64 system time: [39.308 µs 39.322 µs 39.336 µs]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
4 (4.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
|
||||
add_f64 assembly (aarch64 unix)
|
||||
time: [8.0401 µs 8.0442 µs 8.0499 µs]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
2 (2.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
add_f128 compiler-builtins
|
||||
time: [41.801 µs 41.986 µs 42.201 µs]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
4 (4.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
|
||||
cmp_f32_gt compiler-builtins
|
||||
time: [13.579 µs 13.675 µs 13.778 µs]
|
||||
Found 16 outliers among 100 measurements (16.00%)
|
||||
6 (6.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
|
||||
cmp_f32_gt system time: [12.343 µs 12.348 µs 12.355 µs]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
1 (1.00%) low mild
|
||||
3 (3.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
cmp_f32_gt assembly (aarch64 unix)
|
||||
time: [8.2593 µs 8.3185 µs 8.3813 µs]
|
||||
Found 1 outliers among 100 measurements (1.00%)
|
||||
1 (1.00%) high mild
|
||||
|
||||
cmp_f32_unord compiler-builtins
|
||||
time: [11.977 µs 12.042 µs 12.109 µs]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
5 (5.00%) low severe
|
||||
6 (6.00%) low mild
|
||||
2 (2.00%) high mild
|
||||
|
||||
cmp_f32_unord system time: [8.1236 µs 8.1736 µs 8.2350 µs]
|
||||
Found 18 outliers among 100 measurements (18.00%)
|
||||
5 (5.00%) high mild
|
||||
13 (13.00%) high severe
|
||||
|
||||
cmp_f32_unord assembly (aarch64 unix)
|
||||
time: [8.1446 µs 8.2080 µs 8.2762 µs]
|
||||
Found 14 outliers among 100 measurements (14.00%)
|
||||
6 (6.00%) high mild
|
||||
8 (8.00%) high severe
|
||||
|
||||
cmp_f64_gt compiler-builtins
|
||||
time: [16.073 µs 16.077 µs 16.082 µs]
|
||||
Found 17 outliers among 100 measurements (17.00%)
|
||||
2 (2.00%) low mild
|
||||
4 (4.00%) high mild
|
||||
11 (11.00%) high severe
|
||||
|
||||
cmp_f64_gt system time: [12.456 µs 12.487 µs 12.522 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
cmp_f64_gt assembly (aarch64 unix)
|
||||
time: [8.0557 µs 8.0616 µs 8.0685 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
cmp_f64_unord compiler-builtins
|
||||
time: [10.715 µs 10.724 µs 10.737 µs]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
3 (3.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
|
||||
cmp_f64_unord system time: [8.0692 µs 8.0734 µs 8.0784 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
cmp_f64_unord assembly (aarch64 unix)
|
||||
time: [8.0569 µs 8.0677 µs 8.0818 µs]
|
||||
Found 18 outliers among 100 measurements (18.00%)
|
||||
4 (4.00%) high mild
|
||||
14 (14.00%) high severe
|
||||
|
||||
cmp_f128_gt compiler-builtins
|
||||
time: [18.234 µs 18.401 µs 18.602 µs]
|
||||
|
||||
cmp_f128_unord compiler-builtins
|
||||
time: [13.410 µs 13.471 µs 13.542 µs]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
7 (7.00%) high mild
|
||||
|
||||
conv_u32_f32 compiler-builtins
|
||||
time: [774.58 ns 776.01 ns 777.59 ns]
|
||||
Found 9 outliers among 100 measurements (9.00%)
|
||||
2 (2.00%) high mild
|
||||
7 (7.00%) high severe
|
||||
|
||||
conv_u32_f32 system time: [622.68 ns 625.64 ns 629.26 ns]
|
||||
Found 16 outliers among 100 measurements (16.00%)
|
||||
7 (7.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
conv_u32_f32 assembly (aarch64 unix)
|
||||
time: [468.05 ns 469.76 ns 471.46 ns]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_u32_f64 compiler-builtins
|
||||
time: [617.61 ns 618.00 ns 618.52 ns]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
4 (4.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
conv_u32_f64 system time: [469.56 ns 471.03 ns 472.81 ns]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
7 (7.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
conv_u32_f64 assembly (aarch64 unix)
|
||||
time: [464.43 ns 465.01 ns 465.72 ns]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
5 (5.00%) high mild
|
||||
8 (8.00%) high severe
|
||||
|
||||
conv_u64_f32 compiler-builtins
|
||||
time: [847.95 ns 848.19 ns 848.46 ns]
|
||||
Found 19 outliers among 100 measurements (19.00%)
|
||||
3 (3.00%) low mild
|
||||
9 (9.00%) high mild
|
||||
7 (7.00%) high severe
|
||||
|
||||
conv_u64_f32 system time: [701.68 ns 701.95 ns 702.30 ns]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
4 (4.00%) high mild
|
||||
6 (6.00%) high severe
|
||||
|
||||
conv_u64_f32 assembly (aarch64 unix)
|
||||
time: [511.73 ns 512.43 ns 513.32 ns]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
6 (6.00%) high mild
|
||||
|
||||
conv_u64_f64 compiler-builtins
|
||||
time: [681.23 ns 682.55 ns 684.30 ns]
|
||||
Found 18 outliers among 100 measurements (18.00%)
|
||||
1 (1.00%) high mild
|
||||
17 (17.00%) high severe
|
||||
|
||||
conv_u64_f64 system time: [679.34 ns 679.57 ns 679.88 ns]
|
||||
Found 18 outliers among 100 measurements (18.00%)
|
||||
1 (1.00%) low mild
|
||||
6 (6.00%) high mild
|
||||
11 (11.00%) high severe
|
||||
|
||||
conv_u64_f64 assembly (aarch64 unix)
|
||||
time: [509.90 ns 510.09 ns 510.30 ns]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
6 (6.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
conv_u128_f32 compiler-builtins
|
||||
time: [1.1368 µs 1.1372 µs 1.1377 µs]
|
||||
Found 14 outliers among 100 measurements (14.00%)
|
||||
8 (8.00%) high mild
|
||||
6 (6.00%) high severe
|
||||
|
||||
conv_u128_f32 system time: [1.4338 µs 1.4370 µs 1.4410 µs]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
2 (2.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
|
||||
conv_u128_f64 compiler-builtins
|
||||
time: [1.0133 µs 1.0143 µs 1.0156 µs]
|
||||
Found 16 outliers among 100 measurements (16.00%)
|
||||
2 (2.00%) high mild
|
||||
14 (14.00%) high severe
|
||||
|
||||
conv_u128_f64 system time: [1.3473 µs 1.3530 µs 1.3600 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
4 (4.00%) high mild
|
||||
|
||||
conv_i32_f32 compiler-builtins
|
||||
time: [906.53 ns 907.86 ns 909.23 ns]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
4 (4.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
|
||||
conv_i32_f32 system time: [914.53 ns 915.69 ns 917.01 ns]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
6 (6.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
conv_i32_f32 assembly (aarch64 unix)
|
||||
time: [464.55 ns 465.10 ns 465.83 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
4 (4.00%) high mild
|
||||
|
||||
conv_i32_f64 compiler-builtins
|
||||
time: [617.63 ns 617.92 ns 618.27 ns]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
3 (3.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
conv_i32_f64 system time: [622.83 ns 624.19 ns 625.61 ns]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
5 (5.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_i32_f64 assembly (aarch64 unix)
|
||||
time: [465.24 ns 466.04 ns 466.95 ns]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
4 (4.00%) high mild
|
||||
7 (7.00%) high severe
|
||||
|
||||
conv_i64_f32 compiler-builtins
|
||||
time: [852.67 ns 853.92 ns 855.34 ns]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
3 (3.00%) high mild
|
||||
8 (8.00%) high severe
|
||||
|
||||
conv_i64_f32 system time: [906.94 ns 908.04 ns 909.33 ns]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
2 (2.00%) high mild
|
||||
13 (13.00%) high severe
|
||||
|
||||
conv_i64_f32 assembly (aarch64 unix)
|
||||
time: [510.84 ns 511.27 ns 511.80 ns]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
3 (3.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
|
||||
conv_i64_f64 compiler-builtins
|
||||
time: [932.35 ns 932.97 ns 933.76 ns]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
4 (4.00%) high mild
|
||||
6 (6.00%) high severe
|
||||
|
||||
conv_i64_f64 system time: [955.91 ns 958.95 ns 962.05 ns]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
3 (3.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_i64_f64 assembly (aarch64 unix)
|
||||
time: [510.19 ns 510.72 ns 511.44 ns]
|
||||
Found 9 outliers among 100 measurements (9.00%)
|
||||
5 (5.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
conv_i128_f32 compiler-builtins
|
||||
time: [1.4248 µs 1.4285 µs 1.4323 µs]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
7 (7.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
|
||||
conv_i128_f32 system time: [1.6970 µs 1.7017 µs 1.7069 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
3 (3.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_i128_f64 compiler-builtins
|
||||
time: [1.3132 µs 1.3161 µs 1.3191 µs]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
1 (1.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_i128_f64 system time: [1.6071 µs 1.6100 µs 1.6133 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
3 (3.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_u32 compiler-builtins
|
||||
time: [640.35 ns 641.00 ns 641.68 ns]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
4 (4.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f64_u32 system time: [640.87 ns 641.63 ns 642.42 ns]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f64_u32 assembly (aarch64 unix)
|
||||
time: [482.02 ns 482.67 ns 483.38 ns]
|
||||
Found 1 outliers among 100 measurements (1.00%)
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_u64 compiler-builtins
|
||||
time: [638.58 ns 638.98 ns 639.45 ns]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
1 (1.00%) high mild
|
||||
14 (14.00%) high severe
|
||||
|
||||
conv_f64_u64 system time: [642.54 ns 644.07 ns 645.59 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
3 (3.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_u64 assembly (aarch64 unix)
|
||||
time: [482.65 ns 483.70 ns 484.87 ns]
|
||||
Found 1 outliers among 100 measurements (1.00%)
|
||||
1 (1.00%) high mild
|
||||
|
||||
conv_f64_u128 compiler-builtins
|
||||
time: [1.0631 µs 1.0652 µs 1.0674 µs]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
7 (7.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_u128 system time: [821.41 ns 823.45 ns 825.74 ns]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
8 (8.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
|
||||
conv_f64_i32 compiler-builtins
|
||||
time: [826.76 ns 845.08 ns 870.23 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
4 (4.00%) high mild
|
||||
|
||||
conv_f64_i32 system time: [764.12 ns 764.63 ns 765.26 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f64_i32 assembly (aarch64 unix)
|
||||
time: [484.50 ns 485.98 ns 487.54 ns]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f64_i64 compiler-builtins
|
||||
time: [797.27 ns 798.19 ns 799.84 ns]
|
||||
Found 9 outliers among 100 measurements (9.00%)
|
||||
5 (5.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
conv_f64_i64 system time: [768.74 ns 769.52 ns 770.23 ns]
|
||||
Found 1 outliers among 100 measurements (1.00%)
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_i64 assembly (aarch64 unix)
|
||||
time: [480.59 ns 481.03 ns 481.46 ns]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_i128 compiler-builtins
|
||||
time: [1.0577 µs 1.0591 µs 1.0606 µs]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
1 (1.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_i128 system time: [1.0181 µs 1.0195 µs 1.0211 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
3 (3.00%) high mild
|
||||
|
||||
conv_f32_u32 compiler-builtins
|
||||
time: [800.40 ns 801.39 ns 802.35 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high mild
|
||||
|
||||
conv_f32_u32 system time: [638.12 ns 638.34 ns 638.63 ns]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
4 (4.00%) high mild
|
||||
7 (7.00%) high severe
|
||||
|
||||
conv_f32_u32 assembly (aarch64 unix)
|
||||
time: [479.37 ns 480.97 ns 483.32 ns]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
6 (6.00%) high mild
|
||||
7 (7.00%) high severe
|
||||
|
||||
conv_f32_u64 compiler-builtins
|
||||
time: [801.95 ns 803.64 ns 805.75 ns]
|
||||
|
||||
conv_f32_u64 system time: [638.20 ns 638.56 ns 639.07 ns]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
1 (1.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
conv_f32_u64 assembly (aarch64 unix)
|
||||
time: [480.07 ns 480.47 ns 480.86 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
1 (1.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f32_u128 compiler-builtins
|
||||
time: [1.1579 µs 1.1623 µs 1.1657 µs]
|
||||
Found 14 outliers among 100 measurements (14.00%)
|
||||
2 (2.00%) low severe
|
||||
7 (7.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
|
||||
conv_f32_u128 system time: [1.0344 µs 1.0394 µs 1.0450 µs]
|
||||
|
||||
conv_f32_i32 compiler-builtins
|
||||
time: [800.14 ns 801.52 ns 803.26 ns]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
8 (8.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f32_i32 system time: [741.36 ns 741.74 ns 742.13 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
2 (2.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f32_i32 assembly (aarch64 unix)
|
||||
time: [484.35 ns 486.08 ns 488.11 ns]
|
||||
Found 17 outliers among 100 measurements (17.00%)
|
||||
9 (9.00%) high mild
|
||||
8 (8.00%) high severe
|
||||
|
||||
conv_f32_i64 compiler-builtins
|
||||
time: [800.94 ns 802.68 ns 804.74 ns]
|
||||
|
||||
conv_f32_i64 system time: [748.60 ns 750.68 ns 753.16 ns]
|
||||
Found 9 outliers among 100 measurements (9.00%)
|
||||
4 (4.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
|
||||
conv_f32_i64 assembly (aarch64 unix)
|
||||
time: [480.70 ns 481.23 ns 481.82 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
2 (2.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f32_i128 compiler-builtins
|
||||
time: [1.1774 µs 1.1829 µs 1.1887 µs]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
1 (1.00%) low severe
|
||||
7 (7.00%) low mild
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f32_i128 system time: [1.1785 µs 1.1853 µs 1.1941 µs]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
2 (2.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
|
||||
div_f32 compiler-builtins
|
||||
time: [38.852 µs 39.011 µs 39.178 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
3 (3.00%) high mild
|
||||
|
||||
div_f32 system time: [41.846 µs 41.920 µs 42.005 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
div_f32 assembly (aarch64 unix)
|
||||
time: [8.1309 µs 8.1627 µs 8.2005 µs]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high mild
|
||||
|
||||
div_f64 compiler-builtins
|
||||
time: [50.369 µs 50.605 µs 50.857 µs]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
11 (11.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
div_f64 system time: [53.506 µs 53.582 µs 53.676 µs]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
4 (4.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
div_f64 assembly (aarch64 unix)
|
||||
time: [8.0695 µs 8.0807 µs 8.0948 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
2 (2.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
|
|
@ -0,0 +1,699 @@
|
|||
|
||||
running 0 tests
|
||||
|
||||
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s
|
||||
|
||||
add_f32/compiler-builtins
|
||||
time: [36.813 µs 37.048 µs 37.303 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
5 (5.00%) high mild
|
||||
add_f32/system time: [39.103 µs 39.142 µs 39.189 µs]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
2 (2.00%) high mild
|
||||
6 (6.00%) high severe
|
||||
add_f32/assembly (aarch64 unix)
|
||||
time: [8.3786 µs 8.4680 µs 8.5570 µs]
|
||||
|
||||
add_f64/compiler-builtins
|
||||
time: [35.784 µs 35.819 µs 35.863 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
1 (1.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
add_f64/system time: [39.634 µs 39.689 µs 39.746 µs]
|
||||
Found 16 outliers among 100 measurements (16.00%)
|
||||
4 (4.00%) high mild
|
||||
12 (12.00%) high severe
|
||||
add_f64/assembly (aarch64 unix)
|
||||
time: [8.0533 µs 8.0599 µs 8.0670 µs]
|
||||
Found 14 outliers among 100 measurements (14.00%)
|
||||
6 (6.00%) high mild
|
||||
8 (8.00%) high severe
|
||||
|
||||
add_f128/compiler-builtins
|
||||
time: [41.830 µs 41.920 µs 42.005 µs]
|
||||
|
||||
cmp_f32_gt/compiler-builtins
|
||||
time: [13.405 µs 13.411 µs 13.418 µs]
|
||||
Found 18 outliers among 100 measurements (18.00%)
|
||||
4 (4.00%) high mild
|
||||
14 (14.00%) high severe
|
||||
cmp_f32_gt/system time: [12.348 µs 12.355 µs 12.363 µs]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
2 (2.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
cmp_f32_gt/assembly (aarch64 unix)
|
||||
time: [8.1233 µs 8.1625 µs 8.2072 µs]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
7 (7.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
|
||||
cmp_f32_unord/compiler-builtins
|
||||
time: [11.349 µs 11.467 µs 11.584 µs]
|
||||
cmp_f32_unord/system time: [8.0714 µs 8.0792 µs 8.0890 µs]
|
||||
Found 16 outliers among 100 measurements (16.00%)
|
||||
4 (4.00%) high mild
|
||||
12 (12.00%) high severe
|
||||
cmp_f32_unord/assembly (aarch64 unix)
|
||||
time: [8.1121 µs 8.1705 µs 8.2325 µs]
|
||||
Found 20 outliers among 100 measurements (20.00%)
|
||||
3 (3.00%) high mild
|
||||
17 (17.00%) high severe
|
||||
|
||||
cmp_f64_gt/compiler-builtins
|
||||
time: [13.749 µs 13.837 µs 13.934 µs]
|
||||
Found 20 outliers among 100 measurements (20.00%)
|
||||
9 (9.00%) low mild
|
||||
7 (7.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
cmp_f64_gt/system time: [12.475 µs 12.515 µs 12.565 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
4 (4.00%) high mild
|
||||
cmp_f64_gt/assembly (aarch64 unix)
|
||||
time: [8.0456 µs 8.0540 µs 8.0653 µs]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
3 (3.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
cmp_f64_unord/compiler-builtins
|
||||
time: [10.723 µs 10.730 µs 10.739 µs]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
5 (5.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
cmp_f64_unord/system time: [8.0944 µs 8.1296 µs 8.1683 µs]
|
||||
Found 17 outliers among 100 measurements (17.00%)
|
||||
4 (4.00%) high mild
|
||||
13 (13.00%) high severe
|
||||
cmp_f64_unord/assembly (aarch64 unix)
|
||||
time: [8.1042 µs 8.1337 µs 8.1662 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
3 (3.00%) high mild
|
||||
|
||||
cmp_f128_gt/compiler-builtins
|
||||
time: [20.508 µs 20.558 µs 20.615 µs]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
2 (2.00%) high mild
|
||||
6 (6.00%) high severe
|
||||
|
||||
cmp_f128_unord/compiler-builtins
|
||||
time: [13.332 µs 13.346 µs 13.360 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
2 (2.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_u32_f32/compiler-builtins
|
||||
time: [621.20 ns 621.89 ns 622.65 ns]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
4 (4.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
conv_u32_f32/system time: [621.44 ns 622.08 ns 622.74 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
3 (3.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
conv_u32_f32/assembly (aarch64 unix)
|
||||
time: [465.96 ns 466.65 ns 467.45 ns]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
3 (3.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
|
||||
conv_u32_f64/compiler-builtins
|
||||
time: [619.71 ns 620.51 ns 621.52 ns]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
4 (4.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
conv_u32_f64/system time: [466.60 ns 467.14 ns 467.77 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high mild
|
||||
conv_u32_f64/assembly (aarch64 unix)
|
||||
time: [464.02 ns 464.32 ns 464.69 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
1 (1.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_u64_f32/compiler-builtins
|
||||
time: [851.24 ns 852.98 ns 854.77 ns]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
5 (5.00%) high mild
|
||||
conv_u64_f32/system time: [724.35 ns 729.43 ns 735.07 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
4 (4.00%) high mild
|
||||
conv_u64_f32/assembly (aarch64 unix)
|
||||
time: [513.30 ns 514.64 ns 516.16 ns]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
8 (8.00%) high mild
|
||||
|
||||
conv_u64_f64/compiler-builtins
|
||||
time: [850.72 ns 853.26 ns 856.54 ns]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
2 (2.00%) high mild
|
||||
13 (13.00%) high severe
|
||||
conv_u64_f64/system time: [681.43 ns 682.54 ns 683.79 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
3 (3.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
conv_u64_f64/assembly (aarch64 unix)
|
||||
time: [511.37 ns 511.71 ns 512.02 ns]
|
||||
Found 1 outliers among 100 measurements (1.00%)
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_u128_f32/compiler-builtins
|
||||
time: [1.1395 µs 1.1409 µs 1.1424 µs]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
6 (6.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
conv_u128_f32/system time: [1.4348 µs 1.4369 µs 1.4390 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
4 (4.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_u128_f64/compiler-builtins
|
||||
time: [1.0148 µs 1.0157 µs 1.0167 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
3 (3.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
conv_u128_f64/system time: [1.3404 µs 1.3423 µs 1.3442 µs]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
7 (7.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_i32_f32/compiler-builtins
|
||||
time: [902.89 ns 903.81 ns 904.84 ns]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
4 (4.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
conv_i32_f32/system time: [942.62 ns 949.04 ns 955.77 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
3 (3.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
conv_i32_f32/assembly (aarch64 unix)
|
||||
time: [466.06 ns 466.60 ns 467.27 ns]
|
||||
Found 1 outliers among 100 measurements (1.00%)
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_i32_f64/compiler-builtins
|
||||
time: [618.98 ns 619.24 ns 619.55 ns]
|
||||
Found 17 outliers among 100 measurements (17.00%)
|
||||
1 (1.00%) low mild
|
||||
3 (3.00%) high mild
|
||||
13 (13.00%) high severe
|
||||
conv_i32_f64/system time: [622.18 ns 623.41 ns 624.85 ns]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
5 (5.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
conv_i32_f64/assembly (aarch64 unix)
|
||||
time: [466.26 ns 466.76 ns 467.35 ns]
|
||||
Found 9 outliers among 100 measurements (9.00%)
|
||||
5 (5.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
conv_i64_f32/compiler-builtins
|
||||
time: [850.11 ns 850.45 ns 850.88 ns]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
1 (1.00%) low severe
|
||||
1 (1.00%) low mild
|
||||
3 (3.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
conv_i64_f32/system time: [908.36 ns 908.70 ns 909.10 ns]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
3 (3.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
conv_i64_f32/assembly (aarch64 unix)
|
||||
time: [513.56 ns 514.44 ns 515.38 ns]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
8 (8.00%) high mild
|
||||
|
||||
conv_i64_f64/compiler-builtins
|
||||
time: [935.39 ns 935.78 ns 936.26 ns]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
5 (5.00%) high mild
|
||||
8 (8.00%) high severe
|
||||
conv_i64_f64/system time: [946.56 ns 947.33 ns 948.20 ns]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
6 (6.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
conv_i64_f64/assembly (aarch64 unix)
|
||||
time: [511.55 ns 512.03 ns 512.56 ns]
|
||||
Found 21 outliers among 100 measurements (21.00%)
|
||||
4 (4.00%) high mild
|
||||
17 (17.00%) high severe
|
||||
|
||||
conv_i128_f32/compiler-builtins
|
||||
time: [1.4206 µs 1.4218 µs 1.4232 µs]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
5 (5.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
conv_i128_f32/system time: [1.6863 µs 1.6891 µs 1.6922 µs]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
9 (9.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_i128_f64/compiler-builtins
|
||||
time: [1.3110 µs 1.3122 µs 1.3136 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
2 (2.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
conv_i128_f64/system time: [1.6022 µs 1.6048 µs 1.6090 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
3 (3.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f64_u32/compiler-builtins
|
||||
time: [798.65 ns 799.42 ns 800.39 ns]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
6 (6.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
conv_f64_u32/system time: [639.48 ns 639.88 ns 640.40 ns]
|
||||
Found 16 outliers among 100 measurements (16.00%)
|
||||
1 (1.00%) low mild
|
||||
5 (5.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
conv_f64_u32/assembly (aarch64 unix)
|
||||
time: [480.78 ns 481.35 ns 482.17 ns]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
5 (5.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f64_u64/compiler-builtins
|
||||
time: [799.56 ns 800.54 ns 801.89 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
2 (2.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
conv_f64_u64/system time: [640.72 ns 641.24 ns 641.81 ns]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
3 (3.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
conv_f64_u64/assembly (aarch64 unix)
|
||||
time: [481.54 ns 482.48 ns 483.53 ns]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
1 (1.00%) low severe
|
||||
1 (1.00%) low mild
|
||||
3 (3.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_u128/compiler-builtins
|
||||
time: [1.0510 µs 1.0515 µs 1.0520 µs]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
1 (1.00%) low mild
|
||||
2 (2.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
conv_f64_u128/system time: [818.45 ns 819.23 ns 820.15 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high mild
|
||||
|
||||
conv_f64_i32/compiler-builtins
|
||||
time: [800.56 ns 801.31 ns 802.21 ns]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
3 (3.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
conv_f64_i32/system time: [765.62 ns 766.15 ns 766.80 ns]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
conv_f64_i32/assembly (aarch64 unix)
|
||||
time: [471.65 ns 472.77 ns 473.89 ns]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
1 (1.00%) low mild
|
||||
8 (8.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_i64/compiler-builtins
|
||||
time: [801.00 ns 804.55 ns 808.72 ns]
|
||||
Found 18 outliers among 100 measurements (18.00%)
|
||||
6 (6.00%) high mild
|
||||
12 (12.00%) high severe
|
||||
conv_f64_i64/system time: [770.28 ns 772.47 ns 775.21 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high mild
|
||||
conv_f64_i64/assembly (aarch64 unix)
|
||||
time: [491.56 ns 494.96 ns 499.19 ns]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f64_i128/compiler-builtins
|
||||
time: [1.0637 µs 1.0704 µs 1.0762 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
5 (5.00%) high mild
|
||||
conv_f64_i128/system time: [1.0022 µs 1.0027 µs 1.0033 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
1 (1.00%) low severe
|
||||
3 (3.00%) high severe
|
||||
|
||||
conv_f32_u32/compiler-builtins
|
||||
time: [644.56 ns 647.01 ns 649.95 ns]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
13 (13.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
conv_f32_u32/system time: [648.12 ns 651.20 ns 654.54 ns]
|
||||
Found 9 outliers among 100 measurements (9.00%)
|
||||
7 (7.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
conv_f32_u32/assembly (aarch64 unix)
|
||||
time: [481.02 ns 482.71 ns 484.60 ns]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
1 (1.00%) low mild
|
||||
10 (10.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
conv_f32_u64/compiler-builtins
|
||||
time: [644.14 ns 646.61 ns 649.53 ns]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
6 (6.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
conv_f32_u64/system time: [646.21 ns 650.17 ns 654.55 ns]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
3 (3.00%) high mild
|
||||
conv_f32_u64/assembly (aarch64 unix)
|
||||
time: [473.36 ns 474.60 ns 476.00 ns]
|
||||
Found 9 outliers among 100 measurements (9.00%)
|
||||
2 (2.00%) low mild
|
||||
5 (5.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
conv_f32_u128/compiler-builtins
|
||||
time: [1.0820 µs 1.0828 µs 1.0839 µs]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
1 (1.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
conv_f32_u128/system time: [1.0003 µs 1.0042 µs 1.0076 µs]
|
||||
Found 21 outliers among 100 measurements (21.00%)
|
||||
1 (1.00%) low mild
|
||||
3 (3.00%) high mild
|
||||
17 (17.00%) high severe
|
||||
|
||||
conv_f32_i32/compiler-builtins
|
||||
time: [801.13 ns 801.82 ns 802.53 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high severe
|
||||
conv_f32_i32/system time: [745.17 ns 745.97 ns 746.78 ns]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high severe
|
||||
conv_f32_i32/assembly (aarch64 unix)
|
||||
time: [469.87 ns 470.65 ns 471.57 ns]
|
||||
Found 1 outliers among 100 measurements (1.00%)
|
||||
1 (1.00%) high mild
|
||||
|
||||
conv_f32_i64/compiler-builtins
|
||||
time: [799.44 ns 799.94 ns 800.59 ns]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
1 (1.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
conv_f32_i64/system time: [744.81 ns 745.17 ns 745.62 ns]
|
||||
Found 14 outliers among 100 measurements (14.00%)
|
||||
5 (5.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
conv_f32_i64/assembly (aarch64 unix)
|
||||
time: [465.06 ns 466.01 ns 467.12 ns]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
2 (2.00%) low severe
|
||||
5 (5.00%) high mild
|
||||
6 (6.00%) high severe
|
||||
|
||||
conv_f32_i128/compiler-builtins
|
||||
time: [1.1390 µs 1.1515 µs 1.1637 µs]
|
||||
conv_f32_i128/system time: [1.1315 µs 1.1330 µs 1.1347 µs]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
3 (3.00%) low mild
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
div_f32/compiler-builtins
|
||||
time: [39.408 µs 39.676 µs 39.969 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
5 (5.00%) high mild
|
||||
div_f32/system time: [42.108 µs 42.248 µs 42.528 µs]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
4 (4.00%) high mild
|
||||
7 (7.00%) high severe
|
||||
div_f32/assembly (aarch64 unix)
|
||||
time: [8.0724 µs 8.0794 µs 8.0870 µs]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
5 (5.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
div_f64/compiler-builtins
|
||||
time: [49.992 µs 50.014 µs 50.040 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
5 (5.00%) high severe
|
||||
div_f64/system time: [53.577 µs 53.651 µs 53.743 µs]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
4 (4.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
div_f64/assembly (aarch64 unix)
|
||||
time: [8.0976 µs 8.1064 µs 8.1158 µs]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
3 (3.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
|
||||
extend_f16_f32/compiler-builtins
|
||||
time: [804.09 ns 805.38 ns 807.09 ns]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
extend_f16_f32/system time: [641.07 ns 641.76 ns 642.60 ns]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
6 (6.00%) high mild
|
||||
6 (6.00%) high severe
|
||||
extend_f16_f32/assembly (aarch64 unix)
|
||||
time: [456.69 ns 457.14 ns 457.68 ns]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
4 (4.00%) low mild
|
||||
2 (2.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
extend_f16_f128/compiler-builtins
|
||||
time: [1.1025 µs 1.1035 µs 1.1045 µs]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
1 (1.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
extend_f32_f64/compiler-builtins
|
||||
time: [799.30 ns 799.68 ns 800.16 ns]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
3 (3.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
extend_f32_f64/system time: [992.48 ns 993.27 ns 994.32 ns]
|
||||
Found 15 outliers among 100 measurements (15.00%)
|
||||
3 (3.00%) high mild
|
||||
12 (12.00%) high severe
|
||||
extend_f32_f64/assembly (aarch64 unix)
|
||||
time: [457.65 ns 460.39 ns 463.78 ns]
|
||||
|
||||
extend_f32_f128/compiler-builtins
|
||||
time: [1.0295 µs 1.0311 µs 1.0327 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) low mild
|
||||
1 (1.00%) high mild
|
||||
|
||||
extend_f64_f128/compiler-builtins
|
||||
time: [1.0400 µs 1.0412 µs 1.0426 µs]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
2 (2.00%) high mild
|
||||
|
||||
mul_f32/compiler-builtins
|
||||
time: [25.604 µs 25.705 µs 25.818 µs]
|
||||
Found 23 outliers among 100 measurements (23.00%)
|
||||
17 (17.00%) low severe
|
||||
3 (3.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
mul_f32/system time: [29.914 µs 29.977 µs 30.043 µs]
|
||||
Found 5 outliers among 100 measurements (5.00%)
|
||||
5 (5.00%) high mild
|
||||
mul_f32/assembly (aarch64 unix)
|
||||
time: [8.1384 µs 8.1964 µs 8.2603 µs]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
3 (3.00%) high mild
|
||||
10 (10.00%) high severe
|
||||
|
||||
mul_f64/compiler-builtins
|
||||
time: [25.596 µs 25.615 µs 25.637 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
mul_f64/system time: [30.931 µs 30.963 µs 31.002 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
3 (3.00%) high mild
|
||||
mul_f64/assembly (aarch64 unix)
|
||||
time: [8.0589 µs 8.0638 µs 8.0695 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
mul_f128/compiler-builtins
|
||||
time: [54.242 µs 54.306 µs 54.374 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
powi_f32/compiler-builtins
|
||||
time: [129.91 µs 130.09 µs 130.24 µs]
|
||||
powi_f32/system time: [126.97 µs 127.34 µs 127.82 µs]
|
||||
Found 4 outliers among 100 measurements (4.00%)
|
||||
3 (3.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
powi_f64/compiler-builtins
|
||||
time: [130.08 µs 130.81 µs 131.46 µs]
|
||||
Found 13 outliers among 100 measurements (13.00%)
|
||||
13 (13.00%) high mild
|
||||
powi_f64/system time: [128.51 µs 128.68 µs 128.88 µs]
|
||||
Found 21 outliers among 100 measurements (21.00%)
|
||||
4 (4.00%) high mild
|
||||
17 (17.00%) high severe
|
||||
|
||||
sub_f32/compiler-builtins
|
||||
time: [37.861 µs 38.012 µs 38.158 µs]
|
||||
Found 26 outliers among 100 measurements (26.00%)
|
||||
18 (18.00%) low mild
|
||||
7 (7.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
sub_f32/system time: [39.586 µs 39.628 µs 39.673 µs]
|
||||
Found 2 outliers among 100 measurements (2.00%)
|
||||
1 (1.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
sub_f32/assembly (aarch64 unix)
|
||||
time: [8.0976 µs 8.1584 µs 8.2208 µs]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
6 (6.00%) high mild
|
||||
|
||||
sub_f64/compiler-builtins
|
||||
time: [37.755 µs 37.838 µs 37.921 µs]
|
||||
Found 25 outliers among 100 measurements (25.00%)
|
||||
7 (7.00%) low severe
|
||||
3 (3.00%) low mild
|
||||
4 (4.00%) high mild
|
||||
11 (11.00%) high severe
|
||||
sub_f64/system time: [39.979 µs 40.019 µs 40.064 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
sub_f64/assembly (aarch64 unix)
|
||||
time: [8.0669 µs 8.0733 µs 8.0801 µs]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
3 (3.00%) high mild
|
||||
4 (4.00%) high severe
|
||||
|
||||
sub_f128/compiler-builtins
|
||||
time: [68.618 µs 68.899 µs 69.293 µs]
|
||||
Found 11 outliers among 100 measurements (11.00%)
|
||||
2 (2.00%) high mild
|
||||
9 (9.00%) high severe
|
||||
|
||||
trunc_f32_f16/compiler-builtins
|
||||
time: [1.3343 µs 1.3468 µs 1.3608 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
1 (1.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
trunc_f32_f16/system time: [1.2687 µs 1.2714 µs 1.2738 µs]
|
||||
trunc_f32_f16/assembly (aarch64 unix)
|
||||
time: [470.06 ns 472.96 ns 475.30 ns]
|
||||
|
||||
trunc_f64_f16/compiler-builtins
|
||||
time: [1.2729 µs 1.2738 µs 1.2749 µs]
|
||||
Found 7 outliers among 100 measurements (7.00%)
|
||||
2 (2.00%) high mild
|
||||
5 (5.00%) high severe
|
||||
trunc_f64_f16/assembly (aarch64 unix)
|
||||
time: [455.91 ns 456.61 ns 457.33 ns]
|
||||
Found 12 outliers among 100 measurements (12.00%)
|
||||
1 (1.00%) low severe
|
||||
2 (2.00%) low mild
|
||||
6 (6.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
|
||||
trunc_f64_f32/compiler-builtins
|
||||
time: [1.2240 µs 1.2325 µs 1.2410 µs]
|
||||
Found 17 outliers among 100 measurements (17.00%)
|
||||
4 (4.00%) low mild
|
||||
2 (2.00%) high mild
|
||||
11 (11.00%) high severe
|
||||
trunc_f64_f32/system time: [1.2784 µs 1.2835 µs 1.2884 µs]
|
||||
Found 10 outliers among 100 measurements (10.00%)
|
||||
6 (6.00%) low severe
|
||||
1 (1.00%) low mild
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
trunc_f64_f32/assembly (aarch64 unix)
|
||||
time: [455.64 ns 456.08 ns 456.58 ns]
|
||||
Found 18 outliers among 100 measurements (18.00%)
|
||||
3 (3.00%) low severe
|
||||
4 (4.00%) low mild
|
||||
8 (8.00%) high mild
|
||||
3 (3.00%) high severe
|
||||
|
||||
trunc_f128_f16/compiler-builtins
|
||||
time: [1.2563 µs 1.2666 µs 1.2776 µs]
|
||||
Found 3 outliers among 100 measurements (3.00%)
|
||||
3 (3.00%) high mild
|
||||
|
||||
trunc_f128_f32/compiler-builtins
|
||||
time: [1.2459 µs 1.2482 µs 1.2507 µs]
|
||||
Found 6 outliers among 100 measurements (6.00%)
|
||||
2 (2.00%) low mild
|
||||
2 (2.00%) high mild
|
||||
2 (2.00%) high severe
|
||||
|
||||
trunc_f128_f64/compiler-builtins
|
||||
time: [1.2821 µs 1.3047 µs 1.3452 µs]
|
||||
Found 8 outliers among 100 measurements (8.00%)
|
||||
4 (4.00%) low severe
|
||||
1 (1.00%) low mild
|
||||
2 (2.00%) high mild
|
||||
1 (1.00%) high severe
|
||||
|
||||
|
||||
running 52 tests
|
||||
test memcmp_builtin_1048576 ... bench: 20,975.52 ns/iter (+/- 239.69) = 49991 MB/s
|
||||
test memcmp_builtin_16 ... bench: 1.60 ns/iter (+/- 0.05) = 16000 MB/s
|
||||
test memcmp_builtin_32 ... bench: 1.61 ns/iter (+/- 0.03) = 32000 MB/s
|
||||
test memcmp_builtin_4096 ... bench: 95.84 ns/iter (+/- 2.82) = 43115 MB/s
|
||||
test memcmp_builtin_64 ... bench: 2.39 ns/iter (+/- 0.09) = 32000 MB/s
|
||||
test memcmp_builtin_8 ... bench: 1.60 ns/iter (+/- 0.04) = 8000 MB/s
|
||||
test memcmp_builtin_unaligned_1048575 ... bench: 22,060.00 ns/iter (+/- 873.55) = 47532 MB/s
|
||||
test memcmp_builtin_unaligned_15 ... bench: 3.19 ns/iter (+/- 0.02) = 5333 MB/s
|
||||
test memcmp_builtin_unaligned_31 ... bench: 1.61 ns/iter (+/- 0.01) = 32000 MB/s
|
||||
test memcmp_builtin_unaligned_4095 ... bench: 96.63 ns/iter (+/- 4.58) = 42666 MB/s
|
||||
test memcmp_builtin_unaligned_63 ... bench: 2.40 ns/iter (+/- 0.11) = 32000 MB/s
|
||||
test memcmp_builtin_unaligned_7 ... bench: 3.37 ns/iter (+/- 0.05) = 2666 MB/s
|
||||
test memcmp_rust_1048576 ... bench: 309,647.23 ns/iter (+/- 6,077.35) = 3386 MB/s
|
||||
test memcmp_rust_16 ... bench: 5.66 ns/iter (+/- 0.30) = 3200 MB/s
|
||||
test memcmp_rust_32 ... bench: 10.47 ns/iter (+/- 0.14) = 3200 MB/s
|
||||
test memcmp_rust_4096 ... bench: 1,124.34 ns/iter (+/- 36.92) = 3644 MB/s
|
||||
test memcmp_rust_64 ... bench: 19.90 ns/iter (+/- 0.36) = 3368 MB/s
|
||||
test memcmp_rust_8 ... bench: 3.46 ns/iter (+/- 0.11) = 2666 MB/s
|
||||
test memcmp_rust_unaligned_1048575 ... bench: 308,613.87 ns/iter (+/- 6,613.18) = 3397 MB/s
|
||||
test memcmp_rust_unaligned_15 ... bench: 5.35 ns/iter (+/- 0.05) = 3200 MB/s
|
||||
test memcmp_rust_unaligned_31 ... bench: 9.94 ns/iter (+/- 0.06) = 3555 MB/s
|
||||
test memcmp_rust_unaligned_4095 ... bench: 1,120.06 ns/iter (+/- 5.03) = 3657 MB/s
|
||||
test memcmp_rust_unaligned_63 ... bench: 19.64 ns/iter (+/- 0.82) = 3368 MB/s
|
||||
test memcmp_rust_unaligned_7 ... bench: 3.22 ns/iter (+/- 0.10) = 2666 MB/s
|
||||
test memcpy_builtin_1048576 ... bench: 12,538.05 ns/iter (+/- 354.79) = 83631 MB/s
|
||||
test memcpy_builtin_1048576_misalign ... bench: 30,092.56 ns/iter (+/- 8,064.04) = 34845 MB/s
|
||||
test memcpy_builtin_1048576_offset ... bench: 12,538.36 ns/iter (+/- 359.04) = 83631 MB/s
|
||||
test memcpy_builtin_4096 ... bench: 44.24 ns/iter (+/- 6.80) = 93090 MB/s
|
||||
test memcpy_builtin_4096_misalign ... bench: 45.34 ns/iter (+/- 2.13) = 91022 MB/s
|
||||
test memcpy_builtin_4096_offset ... bench: 44.71 ns/iter (+/- 0.61) = 93090 MB/s
|
||||
test memcpy_rust_1048576 ... bench: 17,943.33 ns/iter (+/- 243.18) = 58439 MB/s
|
||||
test memcpy_rust_1048576_misalign ... bench: 15,004.68 ns/iter (+/- 3,978.65) = 69886 MB/s
|
||||
test memcpy_rust_1048576_offset ... bench: 14,722.06 ns/iter (+/- 479.54) = 71225 MB/s
|
||||
test memcpy_rust_4096 ... bench: 44.91 ns/iter (+/- 4.62) = 93090 MB/s
|
||||
test memcpy_rust_4096_misalign ... bench: 76.21 ns/iter (+/- 8.21) = 53894 MB/s
|
||||
test memcpy_rust_4096_offset ... bench: 76.27 ns/iter (+/- 4.69) = 53894 MB/s
|
||||
test memmove_builtin_1048576 ... bench: 18,644.50 ns/iter (+/- 379.84) = 56242 MB/s
|
||||
test memmove_builtin_1048576_misalign ... bench: 18,947.70 ns/iter (+/- 1,226.26) = 55342 MB/s
|
||||
test memmove_builtin_4096 ... bench: 44.21 ns/iter (+/- 0.79) = 93090 MB/s
|
||||
test memmove_builtin_4096_misalign ... bench: 47.21 ns/iter (+/- 3.12) = 87148 MB/s
|
||||
test memmove_rust_1048576 ... bench: 34,813.33 ns/iter (+/- 3,637.47) = 30120 MB/s
|
||||
test memmove_rust_1048576_misalign ... bench: 35,067.19 ns/iter (+/- 1,699.63) = 29902 MB/s
|
||||
test memmove_rust_4096 ... bench: 148.69 ns/iter (+/- 1.31) = 27675 MB/s
|
||||
test memmove_rust_4096_misalign ... bench: 153.81 ns/iter (+/- 1.71) = 26771 MB/s
|
||||
test memset_builtin_1048576 ... bench: 15,704.12 ns/iter (+/- 12,113.86) = 66771 MB/s
|
||||
test memset_builtin_1048576_offset ... bench: 17,894.23 ns/iter (+/- 175.12) = 58599 MB/s
|
||||
test memset_builtin_4096 ... bench: 39.95 ns/iter (+/- 0.19) = 105025 MB/s
|
||||
test memset_builtin_4096_offset ... bench: 40.48 ns/iter (+/- 3.11) = 102400 MB/s
|
||||
test memset_rust_1048576 ... bench: 10,600.66 ns/iter (+/- 1,559.93) = 98922 MB/s
|
||||
test memset_rust_1048576_offset ... bench: 14,810.85 ns/iter (+/- 575.27) = 70801 MB/s
|
||||
test memset_rust_4096 ... bench: 37.91 ns/iter (+/- 2.77) = 110702 MB/s
|
||||
test memset_rust_4096_offset ... bench: 59.99 ns/iter (+/- 10.45) = 69423 MB/s
|
||||
|
||||
test result: ok. 0 passed; 0 failed; 0 ignored; 52 measured; 0 filtered out; finished in 97.74s
|
||||
|
||||
81
library/compiler-builtins/testcrate/benches/float_add.rs
Normal file
81
library/compiler-builtins/testcrate/benches/float_add.rs
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#![feature(f128)]
|
||||
|
||||
use compiler_builtins::float::add;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
float_bench! {
|
||||
name: add_f32,
|
||||
sig: (a: f32, b: f32) -> f32,
|
||||
crate_fn: add::__addsf3,
|
||||
sys_fn: __addsf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"addss {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fadd {a:s}, {a:s}, {b:s}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: add_f64,
|
||||
sig: (a: f64, b: f64) -> f64,
|
||||
crate_fn: add::__adddf3,
|
||||
sys_fn: __adddf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"addsd {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fadd {a:d}, {a:d}, {b:d}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: add_f128,
|
||||
sig: (a: f128, b: f128) -> f128,
|
||||
crate_fn: add::__addtf3,
|
||||
crate_fn_ppc: add::__addkf3,
|
||||
sys_fn: __addtf3,
|
||||
sys_fn_ppc: __addkf3,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
criterion_group!(float_add, add_f32, add_f64, add_f128);
|
||||
criterion_main!(float_add);
|
||||
202
library/compiler-builtins/testcrate/benches/float_cmp.rs
Normal file
202
library/compiler-builtins/testcrate/benches/float_cmp.rs
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
#![feature(f128)]
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
use compiler_builtins::float::cmp;
|
||||
|
||||
/// `gt` symbols are allowed to return differing results, they just get compared
|
||||
/// to 0.
|
||||
fn gt_res_eq(a: i32, b: i32) -> bool {
|
||||
let a_lt_0 = a <= 0;
|
||||
let b_lt_0 = b <= 0;
|
||||
(a_lt_0 && b_lt_0) || (!a_lt_0 && !b_lt_0)
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f32_gt,
|
||||
sig: (a: f32, b: f32) -> i32,
|
||||
crate_fn: cmp::__gtsf2,
|
||||
sys_fn: __gtsf2,
|
||||
sys_available: all(),
|
||||
output_eq: gt_res_eq,
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"xor {ret:e}, {ret:e}",
|
||||
"ucomiss {a}, {b}",
|
||||
"seta {ret:l}",
|
||||
a = in(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcmp {a:s}, {b:s}",
|
||||
"cset {ret:w}, gt",
|
||||
a = in(vreg) a,
|
||||
b = in(vreg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem,nostack),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f32_unord,
|
||||
sig: (a: f32, b: f32) -> i32,
|
||||
crate_fn: cmp::__unordsf2,
|
||||
sys_fn: __unordsf2,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"xor {ret:e}, {ret:e}",
|
||||
"ucomiss {a}, {b}",
|
||||
"setp {ret:l}",
|
||||
a = in(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcmp {a:s}, {b:s}",
|
||||
"cset {ret:w}, vs",
|
||||
a = in(vreg) a,
|
||||
b = in(vreg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f64_gt,
|
||||
sig: (a: f64, b: f64) -> i32,
|
||||
crate_fn: cmp::__gtdf2,
|
||||
sys_fn: __gtdf2,
|
||||
sys_available: all(),
|
||||
output_eq: gt_res_eq,
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"xor {ret:e}, {ret:e}",
|
||||
"ucomisd {a}, {b}",
|
||||
"seta {ret:l}",
|
||||
a = in(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcmp {a:d}, {b:d}",
|
||||
"cset {ret:w}, gt",
|
||||
a = in(vreg) a,
|
||||
b = in(vreg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f64_unord,
|
||||
sig: (a: f64, b: f64) -> i32,
|
||||
crate_fn: cmp::__unorddf2,
|
||||
sys_fn: __unorddf2,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"xor {ret:e}, {ret:e}",
|
||||
"ucomisd {a}, {b}",
|
||||
"setp {ret:l}",
|
||||
a = in(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcmp {a:d}, {b:d}",
|
||||
"cset {ret:w}, vs",
|
||||
a = in(vreg) a,
|
||||
b = in(vreg) b,
|
||||
ret = out(reg) ret,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f128_gt,
|
||||
sig: (a: f128, b: f128) -> i32,
|
||||
crate_fn: cmp::__gttf2,
|
||||
crate_fn_ppc: cmp::__gtkf2,
|
||||
sys_fn: __gttf2,
|
||||
sys_fn_ppc: __gtkf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
output_eq: gt_res_eq,
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: cmp_f128_unord,
|
||||
sig: (a: f128, b: f128) -> i32,
|
||||
crate_fn: cmp::__unordtf2,
|
||||
crate_fn_ppc: cmp::__unordkf2,
|
||||
sys_fn: __unordtf2,
|
||||
sys_fn_ppc: __unordkf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
float_cmp,
|
||||
cmp_f32_gt,
|
||||
cmp_f32_unord,
|
||||
cmp_f64_gt,
|
||||
cmp_f64_unord,
|
||||
cmp_f128_gt,
|
||||
cmp_f128_unord
|
||||
);
|
||||
criterion_main!(float_cmp);
|
||||
547
library/compiler-builtins/testcrate/benches/float_conv.rs
Normal file
547
library/compiler-builtins/testcrate/benches/float_conv.rs
Normal file
|
|
@ -0,0 +1,547 @@
|
|||
#![feature(f128)]
|
||||
#![allow(improper_ctypes)]
|
||||
|
||||
use compiler_builtins::float::conv;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
/* unsigned int -> float */
|
||||
|
||||
float_bench! {
|
||||
name: conv_u32_f32,
|
||||
sig: (a: u32) -> f32,
|
||||
crate_fn: conv::__floatunsisf,
|
||||
sys_fn: __floatunsisf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"mov {tmp:e}, {a:e}",
|
||||
"cvtsi2ss {ret}, {tmp}",
|
||||
a = in(reg) a,
|
||||
tmp = out(reg) _,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"ucvtf {ret:s}, {a:w}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u32_f64,
|
||||
sig: (a: u32) -> f64,
|
||||
crate_fn: conv::__floatunsidf,
|
||||
sys_fn: __floatunsidf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"mov {tmp:e}, {a:e}",
|
||||
"cvtsi2sd {ret}, {tmp}",
|
||||
a = in(reg) a,
|
||||
tmp = out(reg) _,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"ucvtf {ret:d}, {a:w}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u64_f32,
|
||||
sig: (a: u64) -> f32,
|
||||
crate_fn: conv::__floatundisf,
|
||||
sys_fn: __floatundisf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"ucvtf {ret:s}, {a:x}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u64_f64,
|
||||
sig: (a: u64) -> f64,
|
||||
crate_fn: conv::__floatundidf,
|
||||
sys_fn: __floatundidf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"ucvtf {ret:d}, {a:x}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u128_f32,
|
||||
sig: (a: u128) -> f32,
|
||||
crate_fn: conv::__floatuntisf,
|
||||
sys_fn: __floatuntisf,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_u128_f64,
|
||||
sig: (a: u128) -> f64,
|
||||
crate_fn: conv::__floatuntidf,
|
||||
sys_fn: __floatuntidf,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* signed int -> float */
|
||||
|
||||
float_bench! {
|
||||
name: conv_i32_f32,
|
||||
sig: (a: i32) -> f32,
|
||||
crate_fn: conv::__floatsisf,
|
||||
sys_fn: __floatsisf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"cvtsi2ss {ret}, {a:e}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"scvtf {ret:s}, {a:w}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i32_f64,
|
||||
sig: (a: i32) -> f64,
|
||||
crate_fn: conv::__floatsidf,
|
||||
sys_fn: __floatsidf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"cvtsi2sd {ret}, {a:e}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"scvtf {ret:d}, {a:w}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i64_f32,
|
||||
sig: (a: i64) -> f32,
|
||||
crate_fn: conv::__floatdisf,
|
||||
sys_fn: __floatdisf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"cvtsi2ss {ret}, {a:r}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"scvtf {ret:s}, {a:x}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i64_f64,
|
||||
sig: (a: i64) -> f64,
|
||||
crate_fn: conv::__floatdidf,
|
||||
sys_fn: __floatdidf,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"cvtsi2sd {ret}, {a:r}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"scvtf {ret:d}, {a:x}",
|
||||
a = in(reg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i128_f32,
|
||||
sig: (a: i128) -> f32,
|
||||
crate_fn: conv::__floattisf,
|
||||
sys_fn: __floattisf,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_i128_f64,
|
||||
sig: (a: i128) -> f64,
|
||||
crate_fn: conv::__floattidf,
|
||||
sys_fn: __floattidf,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* float -> unsigned int */
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_u32,
|
||||
sig: (a: f32) -> u32,
|
||||
crate_fn: conv::__fixunssfsi,
|
||||
sys_fn: __fixunssfsi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: u32;
|
||||
asm!(
|
||||
"fcvtzu {ret:w}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_u64,
|
||||
sig: (a: f32) -> u64,
|
||||
crate_fn: conv::__fixunssfdi,
|
||||
sys_fn: __fixunssfdi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: u64;
|
||||
asm!(
|
||||
"fcvtzu {ret:x}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_u128,
|
||||
sig: (a: f32) -> u128,
|
||||
crate_fn: conv::__fixunssfti,
|
||||
sys_fn: __fixunssfti,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_u32,
|
||||
sig: (a: f64) -> u32,
|
||||
crate_fn: conv::__fixunsdfsi,
|
||||
sys_fn: __fixunsdfsi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: u32;
|
||||
asm!(
|
||||
"fcvtzu {ret:w}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_u64,
|
||||
sig: (a: f64) -> u64,
|
||||
crate_fn: conv::__fixunsdfdi,
|
||||
sys_fn: __fixunsdfdi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: u64;
|
||||
asm!(
|
||||
"fcvtzu {ret:x}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_u128,
|
||||
sig: (a: f64) -> u128,
|
||||
crate_fn: conv::__fixunsdfti,
|
||||
sys_fn: __fixunsdfti,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
/* float -> signed int */
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_i32,
|
||||
sig: (a: f32) -> i32,
|
||||
crate_fn: conv::__fixsfsi,
|
||||
sys_fn: __fixsfsi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcvtzs {ret:w}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_i64,
|
||||
sig: (a: f32) -> i64,
|
||||
crate_fn: conv::__fixsfdi,
|
||||
sys_fn: __fixsfdi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i64;
|
||||
asm!(
|
||||
"fcvtzs {ret:x}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
float_bench! {
|
||||
name: conv_f32_i128,
|
||||
sig: (a: f32) -> i128,
|
||||
crate_fn: conv::__fixsfti,
|
||||
sys_fn: __fixsfti,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_i32,
|
||||
sig: (a: f64) -> i32,
|
||||
crate_fn: conv::__fixdfsi,
|
||||
sys_fn: __fixdfsi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i32;
|
||||
asm!(
|
||||
"fcvtzs {ret:w}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_i64,
|
||||
sig: (a: f64) -> i64,
|
||||
crate_fn: conv::__fixdfdi,
|
||||
sys_fn: __fixdfdi,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: i64;
|
||||
asm!(
|
||||
"fcvtzs {ret:x}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: conv_f64_i128,
|
||||
sig: (a: f64) -> i128,
|
||||
crate_fn: conv::__fixdfti,
|
||||
sys_fn: __fixdfti,
|
||||
sys_available: all(),
|
||||
asm: []
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
float_conv,
|
||||
conv_u32_f32,
|
||||
conv_u32_f64,
|
||||
conv_u64_f32,
|
||||
conv_u64_f64,
|
||||
conv_u128_f32,
|
||||
conv_u128_f64,
|
||||
conv_i32_f32,
|
||||
conv_i32_f64,
|
||||
conv_i64_f32,
|
||||
conv_i64_f64,
|
||||
conv_i128_f32,
|
||||
conv_i128_f64,
|
||||
conv_f64_u32,
|
||||
conv_f64_u64,
|
||||
conv_f64_u128,
|
||||
conv_f64_i32,
|
||||
conv_f64_i64,
|
||||
conv_f64_i128,
|
||||
);
|
||||
|
||||
// FIXME: ppc64le has a sporadic overflow panic in the crate functions
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
criterion_group!(
|
||||
float_conv_not_ppc64le,
|
||||
conv_f32_u32,
|
||||
conv_f32_u64,
|
||||
conv_f32_u128,
|
||||
conv_f32_i32,
|
||||
conv_f32_i64,
|
||||
conv_f32_i128,
|
||||
);
|
||||
|
||||
#[cfg(all(target_arch = "powerpc64", target_endian = "little"))]
|
||||
criterion_main!(float_conv);
|
||||
|
||||
#[cfg(not(all(target_arch = "powerpc64", target_endian = "little")))]
|
||||
criterion_main!(float_conv, float_conv_not_ppc64le);
|
||||
70
library/compiler-builtins/testcrate/benches/float_div.rs
Normal file
70
library/compiler-builtins/testcrate/benches/float_div.rs
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
#![feature(f128)]
|
||||
|
||||
use compiler_builtins::float::div;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
float_bench! {
|
||||
name: div_f32,
|
||||
sig: (a: f32, b: f32) -> f32,
|
||||
crate_fn: div::__divsf3,
|
||||
sys_fn: __divsf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"divss {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fdiv {a:s}, {a:s}, {b:s}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: div_f64,
|
||||
sig: (a: f64, b: f64) -> f64,
|
||||
crate_fn: div::__divdf3,
|
||||
sys_fn: __divdf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"divsd {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fdiv {a:d}, {a:d}, {b:d}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
criterion_group!(float_div, div_f32, div_f64);
|
||||
criterion_main!(float_div);
|
||||
93
library/compiler-builtins/testcrate/benches/float_extend.rs
Normal file
93
library/compiler-builtins/testcrate/benches/float_extend.rs
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
#![allow(unused_variables)] // "unused" f16 registers
|
||||
#![feature(f128)]
|
||||
#![feature(f16)]
|
||||
|
||||
use compiler_builtins::float::extend;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
float_bench! {
|
||||
name: extend_f16_f32,
|
||||
sig: (a: f16) -> f32,
|
||||
crate_fn: extend::__extendhfsf2,
|
||||
sys_fn: __extendhfsf2,
|
||||
sys_available: not(feature = "no-sys-f16"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
// FIXME(f16_f128): remove `to_bits()` after f16 asm support (rust-lang/rust/#116909)
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"fcvt {ret:s}, {a:h}",
|
||||
a = in(vreg) a.to_bits(),
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: extend_f16_f128,
|
||||
sig: (a: f16) -> f128,
|
||||
crate_fn: extend::__extendhftf2,
|
||||
crate_fn_ppc: extend::__extendhfkf2,
|
||||
sys_fn: __extendhftf2,
|
||||
sys_fn_ppc: __extendhfkf2,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: extend_f32_f64,
|
||||
sig: (a: f32) -> f64,
|
||||
crate_fn: extend::__extendsfdf2,
|
||||
sys_fn: __extendsfdf2,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"fcvt {ret:d}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: extend_f32_f128,
|
||||
sig: (a: f32) -> f128,
|
||||
crate_fn: extend::__extendsftf2,
|
||||
crate_fn_ppc: extend::__extendsfkf2,
|
||||
sys_fn: __extendsftf2,
|
||||
sys_fn_ppc: __extendsfkf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: extend_f64_f128,
|
||||
sig: (a: f64) -> f128,
|
||||
crate_fn: extend::__extenddftf2,
|
||||
crate_fn_ppc: extend::__extenddfkf2,
|
||||
sys_fn: __extenddftf2,
|
||||
sys_fn_ppc: __extenddfkf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
float_extend,
|
||||
extend_f16_f32,
|
||||
extend_f16_f128,
|
||||
extend_f32_f64,
|
||||
extend_f32_f128,
|
||||
extend_f64_f128,
|
||||
);
|
||||
criterion_main!(float_extend);
|
||||
81
library/compiler-builtins/testcrate/benches/float_mul.rs
Normal file
81
library/compiler-builtins/testcrate/benches/float_mul.rs
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#![feature(f128)]
|
||||
|
||||
use compiler_builtins::float::mul;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
float_bench! {
|
||||
name: mul_f32,
|
||||
sig: (a: f32, b: f32) -> f32,
|
||||
crate_fn: mul::__mulsf3,
|
||||
sys_fn: __mulsf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"mulss {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fmul {a:s}, {a:s}, {b:s}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: mul_f64,
|
||||
sig: (a: f64, b: f64) -> f64,
|
||||
crate_fn: mul::__muldf3,
|
||||
sys_fn: __muldf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"mulsd {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fmul {a:d}, {a:d}, {b:d}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: mul_f128,
|
||||
sig: (a: f128, b: f128) -> f128,
|
||||
crate_fn: mul::__multf3,
|
||||
crate_fn_ppc: mul::__mulkf3,
|
||||
sys_fn: __multf3,
|
||||
sys_fn_ppc: __mulkf3,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
criterion_group!(float_mul, mul_f32, mul_f64, mul_f128);
|
||||
criterion_main!(float_mul);
|
||||
24
library/compiler-builtins/testcrate/benches/float_pow.rs
Normal file
24
library/compiler-builtins/testcrate/benches/float_pow.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
use compiler_builtins::float::pow;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
float_bench! {
|
||||
name: powi_f32,
|
||||
sig: (a: f32, b: i32) -> f32,
|
||||
crate_fn: pow::__powisf2,
|
||||
sys_fn: __powisf2,
|
||||
sys_available: all(),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: powi_f64,
|
||||
sig: (a: f64, b: i32) -> f64,
|
||||
crate_fn: pow::__powidf2,
|
||||
sys_fn: __powidf2,
|
||||
sys_available: all(),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
criterion_group!(float_add, powi_f32, powi_f64);
|
||||
criterion_main!(float_add);
|
||||
81
library/compiler-builtins/testcrate/benches/float_sub.rs
Normal file
81
library/compiler-builtins/testcrate/benches/float_sub.rs
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
#![feature(f128)]
|
||||
|
||||
use compiler_builtins::float::sub;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
float_bench! {
|
||||
name: sub_f32,
|
||||
sig: (a: f32, b: f32) -> f32,
|
||||
crate_fn: sub::__subsf3,
|
||||
sys_fn: __subsf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"subss {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fsub {a:s}, {a:s}, {b:s}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: sub_f64,
|
||||
sig: (a: f64, b: f64) -> f64,
|
||||
crate_fn: sub::__subdf3,
|
||||
sys_fn: __subdf3,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
asm!(
|
||||
"subsd {a}, {b}",
|
||||
a = inout(xmm_reg) a,
|
||||
b = in(xmm_reg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
asm!(
|
||||
"fsub {a:d}, {a:d}, {b:d}",
|
||||
a = inout(vreg) a,
|
||||
b = in(vreg) b,
|
||||
options(nomem, nostack, pure)
|
||||
);
|
||||
|
||||
a
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: sub_f128,
|
||||
sig: (a: f128, b: f128) -> f128,
|
||||
crate_fn: sub::__subtf3,
|
||||
crate_fn_ppc: sub::__subkf3,
|
||||
sys_fn: __subtf3,
|
||||
sys_fn_ppc: __subkf3,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: []
|
||||
}
|
||||
|
||||
criterion_group!(float_sub, sub_f32, sub_f64, sub_f128);
|
||||
criterion_main!(float_sub);
|
||||
127
library/compiler-builtins/testcrate/benches/float_trunc.rs
Normal file
127
library/compiler-builtins/testcrate/benches/float_trunc.rs
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
#![feature(f128)]
|
||||
#![feature(f16)]
|
||||
|
||||
use compiler_builtins::float::trunc;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use testcrate::float_bench;
|
||||
|
||||
float_bench! {
|
||||
name: trunc_f32_f16,
|
||||
sig: (a: f32) -> f16,
|
||||
crate_fn: trunc::__truncsfhf2,
|
||||
sys_fn: __truncsfhf2,
|
||||
sys_available: not(feature = "no-sys-f16"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
// FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909)
|
||||
let ret: u16;
|
||||
asm!(
|
||||
"fcvt {ret:h}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
f16::from_bits(ret)
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: trunc_f64_f16,
|
||||
sig: (a: f64) -> f16,
|
||||
crate_fn: trunc::__truncdfhf2,
|
||||
sys_fn: __truncdfhf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
// FIXME(f16_f128): remove `from_bits()` after f16 asm support (rust-lang/rust/#116909)
|
||||
let ret: u16;
|
||||
asm!(
|
||||
"fcvt {ret:h}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
f16::from_bits(ret)
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: trunc_f64_f32,
|
||||
sig: (a: f64) -> f32,
|
||||
crate_fn: trunc::__truncdfsf2,
|
||||
sys_fn: __truncdfsf2,
|
||||
sys_available: all(),
|
||||
asm: [
|
||||
#[cfg(target_arch = "x86_64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"cvtsd2ss {ret}, {a}",
|
||||
a = in(xmm_reg) a,
|
||||
ret = lateout(xmm_reg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f32;
|
||||
asm!(
|
||||
"fcvt {ret:s}, {a:d}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: trunc_f128_f16,
|
||||
sig: (a: f128) -> f16,
|
||||
crate_fn: trunc::__trunctfhf2,
|
||||
crate_fn_ppc: trunc::__trunckfhf2,
|
||||
sys_fn: __trunctfhf2,
|
||||
sys_fn_ppc: __trunckfhf2,
|
||||
sys_available: not(feature = "no-sys-f16-f128-convert"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: trunc_f128_f32,
|
||||
sig: (a: f128) -> f32,
|
||||
crate_fn: trunc::__trunctfsf2,
|
||||
crate_fn_ppc: trunc::__trunckfsf2,
|
||||
sys_fn: __trunctfsf2,
|
||||
sys_fn_ppc: __trunckfsf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
float_bench! {
|
||||
name: trunc_f128_f64,
|
||||
sig: (a: f128) -> f64,
|
||||
crate_fn: trunc::__trunctfdf2,
|
||||
crate_fn_ppc: trunc::__trunckfdf2,
|
||||
sys_fn: __trunctfdf2,
|
||||
sys_fn_ppc: __trunckfdf2,
|
||||
sys_available: not(feature = "no-sys-f128"),
|
||||
asm: [],
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
float_trunc,
|
||||
trunc_f32_f16,
|
||||
trunc_f64_f16,
|
||||
trunc_f64_f32,
|
||||
trunc_f128_f16,
|
||||
trunc_f128_f32,
|
||||
trunc_f128_f64,
|
||||
);
|
||||
criterion_main!(float_trunc);
|
||||
|
|
@ -5,6 +5,8 @@ use std::{collections::HashSet, env};
|
|||
enum Feature {
|
||||
NoSysF128,
|
||||
NoSysF128IntConvert,
|
||||
NoSysF16,
|
||||
NoSysF16F128Convert,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
|
|
@ -31,6 +33,7 @@ fn main() {
|
|||
{
|
||||
features.insert(Feature::NoSysF128);
|
||||
features.insert(Feature::NoSysF128IntConvert);
|
||||
features.insert(Feature::NoSysF16F128Convert);
|
||||
}
|
||||
|
||||
if target.starts_with("i586") || target.starts_with("i686") {
|
||||
|
|
@ -38,6 +41,17 @@ fn main() {
|
|||
features.insert(Feature::NoSysF128IntConvert);
|
||||
}
|
||||
|
||||
if target.contains("-unknown-linux-") {
|
||||
// No `__extendhftf2` on x86, no `__trunctfhf2` on aarch64
|
||||
features.insert(Feature::NoSysF16F128Convert);
|
||||
}
|
||||
|
||||
if target.starts_with("wasm32-") {
|
||||
// Linking says "error: function signature mismatch: __extendhfsf2" and seems to
|
||||
// think the signature is either `(i32) -> f32` or `(f32) -> f32`
|
||||
features.insert(Feature::NoSysF16);
|
||||
}
|
||||
|
||||
for feature in features {
|
||||
let (name, warning) = match feature {
|
||||
Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
|
||||
|
|
@ -45,6 +59,11 @@ fn main() {
|
|||
"no-sys-f128-int-convert",
|
||||
"using apfloat fallback for f128 to int conversions",
|
||||
),
|
||||
Feature::NoSysF16F128Convert => (
|
||||
"no-sys-f16-f128-convert",
|
||||
"skipping using apfloat fallback for f16 <-> f128 conversions",
|
||||
),
|
||||
Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"),
|
||||
};
|
||||
println!("cargo:warning={warning}");
|
||||
println!("cargo:rustc-cfg=feature=\"{name}\"");
|
||||
|
|
|
|||
348
library/compiler-builtins/testcrate/src/bench.rs
Normal file
348
library/compiler-builtins/testcrate/src/bench.rs
Normal file
|
|
@ -0,0 +1,348 @@
|
|||
use core::cell::RefCell;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
use compiler_builtins::float::Float;
|
||||
|
||||
/// Fuzz with these many items to ensure equal functions
|
||||
pub const CHECK_ITER_ITEMS: u32 = 10_000;
|
||||
/// Benchmark with this many items to get a variety
|
||||
pub const BENCH_ITER_ITEMS: u32 = 500;
|
||||
|
||||
/// Still run benchmarks/tests but don't check correctness between compiler-builtins and
|
||||
/// builtin system functions functions
|
||||
pub fn skip_sys_checks(test_name: &str) -> bool {
|
||||
const ALWAYS_SKIPPED: &[&str] = &[
|
||||
// FIXME(f16_f128): system symbols have incorrect results
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617>
|
||||
"extend_f16_f32",
|
||||
"trunc_f32_f16",
|
||||
"trunc_f64_f16",
|
||||
// FIXME(f16_f128): rounding error
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/616>
|
||||
"mul_f128",
|
||||
];
|
||||
|
||||
// FIXME(f16_f128): error on LE ppc64. There are more tests that are cfg-ed out completely
|
||||
// in their benchmark modules due to runtime panics.
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
|
||||
const PPC64LE_SKIPPED: &[&str] = &["extend_f32_f128"];
|
||||
|
||||
// FIXME(f16_f128): system symbols have incorrect results
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2125914639>
|
||||
const X86_NO_SSE_SKIPPED: &[&str] = &["add_f128", "sub_f128", "powi_f32", "powi_f64"];
|
||||
|
||||
// FIXME(llvm): system symbols have incorrect results on Windows
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/617#issuecomment-2121359807>
|
||||
const WINDOWS_SKIPPED: &[&str] = &[
|
||||
"conv_f32_u128",
|
||||
"conv_f32_i128",
|
||||
"conv_f64_u128",
|
||||
"conv_f64_i128",
|
||||
];
|
||||
|
||||
if cfg!(target_arch = "arm") {
|
||||
// The Arm symbols need a different ABI that our macro doesn't handle, just skip it
|
||||
return true;
|
||||
}
|
||||
|
||||
if ALWAYS_SKIPPED.contains(&test_name) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if cfg!(all(target_arch = "powerpc64", target_endian = "little"))
|
||||
&& PPC64LE_SKIPPED.contains(&test_name)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if cfg!(all(target_arch = "x86", not(target_feature = "sse")))
|
||||
&& X86_NO_SSE_SKIPPED.contains(&test_name)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if cfg!(target_family = "windows") && WINDOWS_SKIPPED.contains(&test_name) {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// Still run benchmarks/tests but don't check correctness between compiler-builtins and
|
||||
/// assembly functions
|
||||
pub fn skip_asm_checks(test_name: &str) -> bool {
|
||||
// FIXME(f16_f128): rounding error
|
||||
// <https://github.com/rust-lang/compiler-builtins/issues/616>
|
||||
const SKIPPED: &[&str] = &["mul_f32", "mul_f64"];
|
||||
|
||||
SKIPPED.contains(&test_name)
|
||||
}
|
||||
|
||||
/// Create a comparison of the system symbol, compiler_builtins, and optionally handwritten
|
||||
/// assembly.
|
||||
#[macro_export]
|
||||
macro_rules! float_bench {
|
||||
(
|
||||
// Name of this benchmark
|
||||
name: $name:ident,
|
||||
// The function signature to be tested
|
||||
sig: ($($arg:ident: $arg_ty:ty),*) -> $ret_ty:ty,
|
||||
// Path to the crate in compiler_builtins
|
||||
crate_fn: $crate_fn:path,
|
||||
// Optional alias on ppc
|
||||
$( crate_fn_ppc: $crate_fn_ppc:path, )?
|
||||
// Name of the system symbol
|
||||
sys_fn: $sys_fn:ident,
|
||||
// Optional alias on ppc
|
||||
$( sys_fn_ppc: $sys_fn_ppc:path, )?
|
||||
// Meta saying whether the system symbol is available
|
||||
sys_available: $sys_available:meta,
|
||||
// An optional function to validate the results of two functions are equal, if not
|
||||
// just `$ret_ty::check_eq`
|
||||
$( output_eq: $output_eq:expr, )?
|
||||
// Assembly implementations, if any.
|
||||
asm: [
|
||||
$(
|
||||
#[cfg($asm_meta:meta)] {
|
||||
$($asm_tt:tt)*
|
||||
}
|
||||
);*
|
||||
$(;)?
|
||||
]
|
||||
$(,)?
|
||||
) => {paste::paste! {
|
||||
#[cfg($sys_available)]
|
||||
extern "C" {
|
||||
/// Binding for the system function
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty;
|
||||
|
||||
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
float_bench! { @coalesce_fn $($sys_fn_ppc)? =>
|
||||
fn $sys_fn($($arg: $arg_ty),*) -> $ret_ty;
|
||||
}
|
||||
}
|
||||
|
||||
fn $name(c: &mut Criterion) {
|
||||
use core::hint::black_box;
|
||||
use compiler_builtins::float::Float;
|
||||
use $crate::bench::TestIO;
|
||||
|
||||
#[inline(never)] // equalize with external calls
|
||||
fn crate_fn($($arg: $arg_ty),*) -> $ret_ty {
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
let target_crate_fn = $crate_fn;
|
||||
|
||||
// On PPC, use an alias if specified
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
let target_crate_fn = float_bench!(@coalesce $($crate_fn_ppc)?, $crate_fn);
|
||||
|
||||
target_crate_fn( $($arg),* )
|
||||
}
|
||||
|
||||
#[inline(always)] // already a branch
|
||||
#[cfg($sys_available)]
|
||||
fn sys_fn($($arg: $arg_ty),*) -> $ret_ty {
|
||||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
let target_sys_fn = $sys_fn;
|
||||
|
||||
// On PPC, use an alias if specified
|
||||
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
|
||||
let target_sys_fn = float_bench!(@coalesce $($sys_fn_ppc)?, $sys_fn);
|
||||
|
||||
unsafe { target_sys_fn( $($arg),* ) }
|
||||
}
|
||||
|
||||
#[inline(never)] // equalize with external calls
|
||||
#[cfg(any( $($asm_meta),* ))]
|
||||
fn asm_fn($(mut $arg: $arg_ty),*) -> $ret_ty {
|
||||
use core::arch::asm;
|
||||
$(
|
||||
#[cfg($asm_meta)]
|
||||
unsafe { $($asm_tt)* }
|
||||
)*
|
||||
}
|
||||
|
||||
let testvec = <($($arg_ty),*)>::make_testvec($crate::bench::CHECK_ITER_ITEMS);
|
||||
let benchvec = <($($arg_ty),*)>::make_testvec($crate::bench::BENCH_ITER_ITEMS);
|
||||
let test_name = stringify!($name);
|
||||
let check_eq = float_bench!(@coalesce $($output_eq)?, $ret_ty::check_eq);
|
||||
|
||||
// Verify math lines up. We run the crate functions even if we don't validate the
|
||||
// output here to make sure there are no panics or crashes.
|
||||
|
||||
#[cfg($sys_available)]
|
||||
for ($($arg),*) in testvec.iter().copied() {
|
||||
let crate_res = crate_fn($($arg),*);
|
||||
let sys_res = sys_fn($($arg),*);
|
||||
|
||||
if $crate::bench::skip_sys_checks(test_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(
|
||||
check_eq(crate_res, sys_res),
|
||||
"{test_name}{:?}: crate: {crate_res:?}, sys: {sys_res:?}",
|
||||
($($arg),* ,)
|
||||
);
|
||||
}
|
||||
|
||||
#[cfg(any( $($asm_meta),* ))]
|
||||
{
|
||||
for ($($arg),*) in testvec.iter().copied() {
|
||||
let crate_res = crate_fn($($arg),*);
|
||||
let asm_res = asm_fn($($arg),*);
|
||||
|
||||
if $crate::bench::skip_asm_checks(test_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(
|
||||
check_eq(crate_res, asm_res),
|
||||
"{test_name}{:?}: crate: {crate_res:?}, asm: {asm_res:?}",
|
||||
($($arg),* ,)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let mut group = c.benchmark_group(test_name);
|
||||
group.bench_function("compiler-builtins", |b| b.iter(|| {
|
||||
for ($($arg),*) in benchvec.iter().copied() {
|
||||
black_box(crate_fn( $(black_box($arg)),* ));
|
||||
}
|
||||
}));
|
||||
|
||||
#[cfg($sys_available)]
|
||||
group.bench_function("system", |b| b.iter(|| {
|
||||
for ($($arg),*) in benchvec.iter().copied() {
|
||||
black_box(sys_fn( $(black_box($arg)),* ));
|
||||
}
|
||||
}));
|
||||
|
||||
#[cfg(any( $($asm_meta),* ))]
|
||||
group.bench_function(&format!(
|
||||
"assembly ({} {})", std::env::consts::ARCH, std::env::consts::FAMILY
|
||||
), |b| b.iter(|| {
|
||||
for ($($arg),*) in benchvec.iter().copied() {
|
||||
black_box(asm_fn( $(black_box($arg)),* ));
|
||||
}
|
||||
}));
|
||||
|
||||
group.finish();
|
||||
}
|
||||
}};
|
||||
|
||||
// Allow overriding a default
|
||||
(@coalesce $specified:expr, $default:expr) => { $specified };
|
||||
(@coalesce, $default:expr) => { $default };
|
||||
|
||||
// Allow overriding a function name
|
||||
(@coalesce_fn $specified:ident => fn $default_name:ident $($tt:tt)+) => {
|
||||
fn $specified $($tt)+
|
||||
};
|
||||
(@coalesce_fn => fn $default_name:ident $($tt:tt)+) => {
|
||||
fn $default_name $($tt)+
|
||||
};
|
||||
}
|
||||
|
||||
/// A type used as either an input or output to/from a benchmark function.
|
||||
pub trait TestIO: Sized {
|
||||
fn make_testvec(len: u32) -> Vec<Self>;
|
||||
fn check_eq(a: Self, b: Self) -> bool;
|
||||
}
|
||||
|
||||
macro_rules! impl_testio {
|
||||
(float $($f_ty:ty),+) => {$(
|
||||
impl TestIO for $f_ty {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ret = RefCell::new(Vec::new());
|
||||
crate::fuzz_float(len, |a| ret.borrow_mut().push(a));
|
||||
ret.into_inner()
|
||||
}
|
||||
|
||||
fn check_eq(a: Self, b: Self) -> bool {
|
||||
Float::eq_repr(a, b)
|
||||
}
|
||||
}
|
||||
|
||||
impl TestIO for ($f_ty, $f_ty) {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ret = RefCell::new(Vec::new());
|
||||
crate::fuzz_float_2(len, |a, b| ret.borrow_mut().push((a, b)));
|
||||
ret.into_inner()
|
||||
}
|
||||
|
||||
fn check_eq(_a: Self, _b: Self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
)*};
|
||||
|
||||
(int $($i_ty:ty),+) => {$(
|
||||
impl TestIO for $i_ty {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ret = RefCell::new(Vec::new());
|
||||
crate::fuzz(len, |a| ret.borrow_mut().push(a));
|
||||
ret.into_inner()
|
||||
}
|
||||
|
||||
fn check_eq(a: Self, b: Self) -> bool {
|
||||
a == b
|
||||
}
|
||||
}
|
||||
|
||||
impl TestIO for ($i_ty, $i_ty) {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ret = RefCell::new(Vec::new());
|
||||
crate::fuzz_2(len, |a, b| ret.borrow_mut().push((a, b)));
|
||||
ret.into_inner()
|
||||
}
|
||||
|
||||
fn check_eq(_a: Self, _b: Self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
)*};
|
||||
|
||||
((float, int) ($f_ty:ty, $i_ty:ty)) => {
|
||||
impl TestIO for ($f_ty, $i_ty) {
|
||||
fn make_testvec(len: u32) -> Vec<Self> {
|
||||
// refcell because fuzz_* takes a `Fn`
|
||||
let ivec = RefCell::new(Vec::new());
|
||||
let fvec = RefCell::new(Vec::new());
|
||||
|
||||
crate::fuzz(len.isqrt(), |a| ivec.borrow_mut().push(a));
|
||||
crate::fuzz_float(len.isqrt(), |a| fvec.borrow_mut().push(a));
|
||||
|
||||
let mut ret = Vec::new();
|
||||
let ivec = ivec.into_inner();
|
||||
let fvec = fvec.into_inner();
|
||||
|
||||
for f in fvec {
|
||||
for i in &ivec {
|
||||
ret.push((f, *i));
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
fn check_eq(_a: Self, _b: Self) -> bool {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "no-f16-f128"))]
|
||||
impl_testio!(float f16, f128);
|
||||
impl_testio!(float f32, f64);
|
||||
impl_testio!(int i16, i32, i64, i128);
|
||||
impl_testio!(int u16, u32, u64, u128);
|
||||
impl_testio!((float, int)(f32, i32));
|
||||
impl_testio!((float, int)(f64, i32));
|
||||
|
|
@ -13,6 +13,12 @@
|
|||
//! Some floating point tests are disabled for specific architectures, because they do not have
|
||||
//! correct rounding.
|
||||
#![no_std]
|
||||
#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))]
|
||||
#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))]
|
||||
#![feature(isqrt)]
|
||||
|
||||
pub mod bench;
|
||||
extern crate alloc;
|
||||
|
||||
use compiler_builtins::float::Float;
|
||||
use compiler_builtins::int::{Int, MinInt};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue