Add builtins for f16/f128 float conversions

This commit is contained in:
beetrees 2024-04-14 08:05:18 +01:00 committed by Amanieu d'Antras
parent 0b4ddb1738
commit 229babb1d0
11 changed files with 283 additions and 115 deletions

View file

@ -49,6 +49,10 @@ c = ["cc"]
# which use inline assembly and fall back to pure Rust versions (if avalible).
no-asm = []
# Workaround for codegen backends which haven't yet implemented `f16` and
# `f128` support. Disabled any intrinsics which use those types.
no-f16-f128 = []
# Flag this library as the unstable compiler-builtins lib
compiler-builtins = []

View file

@ -162,7 +162,6 @@ rely on CI.
- [x] divmodsi4.c
- [x] divsf3.c
- [x] divsi3.c
- [ ] extendhfsf2.c
- [x] extendsfdf2.c
- [x] fixdfdi.c
- [x] fixdfsi.c
@ -201,9 +200,7 @@ rely on CI.
- [x] powisf2.c
- [x] subdf3.c
- [x] subsf3.c
- [ ] truncdfhf2.c
- [x] truncdfsf2.c
- [ ] truncsfhf2.c
- [x] udivdi3.c
- [x] udivmoddi4.c
- [x] udivmodsi4.c
@ -233,60 +230,68 @@ These builtins are needed to support 128-bit integers.
- [x] udivti3.c
- [x] umodti3.c
These builtins are needed to support `f16` and `f128`, which are in the process of being added to Rust.
- [ ] addtf3.c
- [ ] comparetf2.c
- [ ] divtf3.c
- [x] extenddftf2.c
- [x] extendhfsf2.c
- [x] extendhftf2.c
- [x] extendsftf2.c
- [ ] fixtfdi.c
- [ ] fixtfsi.c
- [ ] fixtfti.c
- [ ] fixunstfdi.c
- [ ] fixunstfsi.c
- [ ] fixunstfti.c
- [ ] floatditf.c
- [ ] floatsitf.c
- [ ] floatunditf.c
- [ ] floatunsitf.c
- [ ] multf3.c
- [ ] powitf2.c
- [ ] ppc/fixtfdi.c
- [ ] ppc/fixunstfdi.c
- [ ] ppc/floatditf.c
- [ ] ppc/floatunditf.c
- [ ] subtf3.c
- [x] truncdfhf2.c
- [x] truncsfhf2.c
- [x] trunctfdf2.c
- [x] trunctfhf2.c
- [x] trunctfsf2.c
## Unimplemented functions
These builtins involve floating-point types ("`f128`", "`f80`" and complex numbers) that are not supported by Rust.
These builtins involve floating-point types ("`f80`" and complex numbers) that are not supported by Rust.
- ~~addtf3.c~~
- ~~comparetf2.c~~
- ~~divdc3.c~~
- ~~divsc3.c~~
- ~~divtc3.c~~
- ~~divtf3.c~~
- ~~divxc3.c~~
- ~~extenddftf2.c~~
- ~~extendsftf2.c~~
- ~~fixtfdi.c~~
- ~~fixtfsi.c~~
- ~~fixtfti.c~~
- ~~fixunstfdi.c~~
- ~~fixunstfsi.c~~
- ~~fixunstfti.c~~
- ~~fixunsxfdi.c~~
- ~~fixunsxfsi.c~~
- ~~fixunsxfti.c~~
- ~~fixxfdi.c~~
- ~~fixxfti.c~~
- ~~floatditf.c~~
- ~~floatdixf.c~~
- ~~floatsitf.c~~
- ~~floattixf.c~~
- ~~floatunditf.c~~
- ~~floatundixf.c~~
- ~~floatunsitf.c~~
- ~~floatuntixf.c~~
- ~~i386/floatdixf.S~~
- ~~i386/floatundixf.S~~
- ~~muldc3.c~~
- ~~mulsc3.c~~
- ~~multc3.c~~
- ~~multf3.c~~
- ~~mulxc3.c~~
- ~~powitf2.c~~
- ~~powixf2.c~~
- ~~ppc/divtc3.c~~
- ~~ppc/fixtfdi.c~~
- ~~ppc/fixunstfdi.c~~
- ~~ppc/floatditf.c~~
- ~~ppc/floatunditf.c~~
- ~~ppc/gcc_qadd.c~~
- ~~ppc/gcc_qdiv.c~~
- ~~ppc/gcc_qmul.c~~
- ~~ppc/gcc_qsub.c~~
- ~~ppc/multc3.c~~
- ~~subtf3.c~~
- ~~trunctfdf2.c~~
- ~~trunctfsf2.c~~
- ~~x86_64/floatdixf.c~~
- ~~x86_64/floatundixf.S~~

View file

@ -217,6 +217,14 @@ mod c {
}
}
// `compiler-rt` requires `COMPILER_RT_HAS_FLOAT16` to be defined to make it use the
// `_Float16` type for `f16` intrinsics. This shouldn't matter as all existing `f16`
// intrinsics have been ported to Rust in `compiler-builtins` as C compilers don't
// support `_Float16` on all targets (whereas Rust does). However, define the macro
// anyway to prevent issues like rust#118813 and rust#123885 silently reoccuring if more
// `f16` intrinsics get accidentally added here in the future.
cfg.define("COMPILER_RT_HAS_FLOAT16", None);
cfg.warnings(false);
if target_env == "msvc" {
@ -288,13 +296,10 @@ mod c {
sources.extend(&[
("__divdc3", "divdc3.c"),
("__divsc3", "divsc3.c"),
("__extendhfsf2", "extendhfsf2.c"),
("__muldc3", "muldc3.c"),
("__mulsc3", "mulsc3.c"),
("__negdf2", "negdf2.c"),
("__negsf2", "negsf2.c"),
("__truncdfhf2", "truncdfhf2.c"),
("__truncsfhf2", "truncsfhf2.c"),
]);
}
@ -464,8 +469,6 @@ mod c {
if (target_arch == "aarch64" || target_arch == "arm64ec") && consider_float_intrinsics {
sources.extend(&[
("__comparetf2", "comparetf2.c"),
("__extenddftf2", "extenddftf2.c"),
("__extendsftf2", "extendsftf2.c"),
("__fixtfdi", "fixtfdi.c"),
("__fixtfsi", "fixtfsi.c"),
("__fixtfti", "fixtfti.c"),
@ -476,8 +479,6 @@ mod c {
("__floatsitf", "floatsitf.c"),
("__floatunditf", "floatunditf.c"),
("__floatunsitf", "floatunsitf.c"),
("__trunctfdf2", "trunctfdf2.c"),
("__trunctfsf2", "trunctfsf2.c"),
("__addtf3", "addtf3.c"),
("__multf3", "multf3.c"),
("__subtf3", "subtf3.c"),
@ -498,7 +499,6 @@ mod c {
if target_arch == "mips64" {
sources.extend(&[
("__extenddftf2", "extenddftf2.c"),
("__netf2", "comparetf2.c"),
("__addtf3", "addtf3.c"),
("__multf3", "multf3.c"),
@ -509,14 +509,11 @@ mod c {
("__floatunsitf", "floatunsitf.c"),
("__fe_getround", "fp_mode.c"),
("__divtf3", "divtf3.c"),
("__trunctfdf2", "trunctfdf2.c"),
("__trunctfsf2", "trunctfsf2.c"),
]);
}
if target_arch == "loongarch64" {
sources.extend(&[
("__extenddftf2", "extenddftf2.c"),
("__netf2", "comparetf2.c"),
("__addtf3", "addtf3.c"),
("__multf3", "multf3.c"),
@ -527,8 +524,6 @@ mod c {
("__floatunsitf", "floatunsitf.c"),
("__fe_getround", "fp_mode.c"),
("__divtf3", "divtf3.c"),
("__trunctfdf2", "trunctfdf2.c"),
("__trunctfsf2", "trunctfsf2.c"),
]);
}

View file

@ -28,6 +28,8 @@ else
$run --features c --release
$run --features no-asm
$run --features no-asm --release
$run --features no-f16-f128
$run --features no-f16-f128 --release
fi
if [ -d /builtins-target ]; then
@ -45,6 +47,8 @@ cargo build --target "$target" --features c
cargo build --target "$target" --release --features c
cargo build --target "$target" --features no-asm
cargo build --target "$target" --release --features no-asm
cargo build --target "$target" --features no-f16-f128
cargo build --target "$target" --release --features no-f16-f128
PREFIX=${target//unknown-/}-
case "$target" in

View file

@ -82,3 +82,37 @@ intrinsics! {
a as f64 // LLVM generate 'fcvtds'
}
}
#[cfg(not(feature = "no-f16-f128"))]
intrinsics! {
#[avr_skip]
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_h2f]
pub extern "C" fn __extendhfsf2(a: f16) -> f32 {
extend(a)
}
#[avr_skip]
#[aapcs_on_arm]
pub extern "C" fn __gnu_h2f_ieee(a: f16) -> f32 {
extend(a)
}
#[avr_skip]
#[aapcs_on_arm]
pub extern "C" fn __extendhftf2(a: f16) -> f128 {
extend(a)
}
#[avr_skip]
#[aapcs_on_arm]
pub extern "C" fn __extendsftf2(a: f32) -> f128 {
extend(a)
}
#[avr_skip]
#[aapcs_on_arm]
pub extern "C" fn __extenddftf2(a: f64) -> f128 {
extend(a)
}
}

View file

@ -127,7 +127,20 @@ macro_rules! float_impl {
self.to_bits() as Self::SignedInt
}
fn eq_repr(self, rhs: Self) -> bool {
if self.is_nan() && rhs.is_nan() {
#[cfg(feature = "mangled-names")]
fn is_nan(x: $ty) -> bool {
// When using mangled-names, the "real" compiler-builtins might not have the
// necessary builtin (__unordtf2) to test whether `f128` is NaN.
// FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
// x is NaN if all the bits of the exponent are set and the significand is non-0
x.repr() & $ty::EXPONENT_MASK == $ty::EXPONENT_MASK
&& x.repr() & $ty::SIGNIFICAND_MASK != 0
}
#[cfg(not(feature = "mangled-names"))]
fn is_nan(x: $ty) -> bool {
x.is_nan()
}
if is_nan(self) && is_nan(rhs) {
true
} else {
self.repr() == rhs.repr()
@ -171,5 +184,9 @@ macro_rules! float_impl {
};
}
#[cfg(not(feature = "no-f16-f128"))]
float_impl!(f16, u16, i16, i8, 16, 10);
float_impl!(f32, u32, i32, i16, 32, 23);
float_impl!(f64, u64, i64, i16, 64, 52);
#[cfg(not(feature = "no-f16-f128"))]
float_impl!(f128, u128, i128, i16, 128, 112);

View file

@ -52,8 +52,10 @@ where
// destination format. We can convert by simply right-shifting with
// rounding and adjusting the exponent.
abs_result = (a_abs >> sign_bits_delta).cast();
let tmp = src_exp_bias.wrapping_sub(dst_exp_bias) << R::SIGNIFICAND_BITS;
abs_result = abs_result.wrapping_sub(tmp.cast());
// Cast before shifting to prevent overflow.
let bias_diff: R::Int = src_exp_bias.wrapping_sub(dst_exp_bias).cast();
let tmp = bias_diff << R::SIGNIFICAND_BITS;
abs_result = abs_result.wrapping_sub(tmp);
let round_bits = a_abs & round_mask;
if round_bits > halfway {
@ -67,13 +69,17 @@ where
// a is NaN.
// Conjure the result by beginning with infinity, setting the qNaN
// bit and inserting the (truncated) trailing NaN field.
abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast();
// Cast before shifting to prevent overflow.
let dst_inf_exp: R::Int = dst_inf_exp.cast();
abs_result = dst_inf_exp << R::SIGNIFICAND_BITS;
abs_result |= dst_qnan;
abs_result |= dst_nan_code
& ((a_abs & src_nan_code) >> (F::SIGNIFICAND_BITS - R::SIGNIFICAND_BITS)).cast();
} else if a_abs >= overflow {
// a overflows to infinity.
abs_result = (dst_inf_exp << R::SIGNIFICAND_BITS).cast();
// Cast before shifting to prevent overflow.
let dst_inf_exp: R::Int = dst_inf_exp.cast();
abs_result = dst_inf_exp << R::SIGNIFICAND_BITS;
} else {
// a underflows on conversion to the destination type or is an exact
// zero. The result may be a denormal or zero. Extract the exponent
@ -124,3 +130,44 @@ intrinsics! {
a as f32
}
}
#[cfg(not(feature = "no-f16-f128"))]
intrinsics! {
#[avr_skip]
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_f2h]
pub extern "C" fn __truncsfhf2(a: f32) -> f16 {
trunc(a)
}
#[avr_skip]
#[aapcs_on_arm]
pub extern "C" fn __gnu_f2h_ieee(a: f32) -> f16 {
trunc(a)
}
#[avr_skip]
#[aapcs_on_arm]
#[arm_aeabi_alias = __aeabi_d2h]
pub extern "C" fn __truncdfhf2(a: f64) -> f16 {
trunc(a)
}
#[avr_skip]
#[aapcs_on_arm]
pub extern "C" fn __trunctfhf2(a: f128) -> f16 {
trunc(a)
}
#[avr_skip]
#[aapcs_on_arm]
pub extern "C" fn __trunctfsf2(a: f128) -> f32 {
trunc(a)
}
#[avr_skip]
#[aapcs_on_arm]
pub extern "C" fn __trunctfdf2(a: f128) -> f64 {
trunc(a)
}
}

View file

@ -13,6 +13,8 @@
#![feature(naked_functions)]
#![feature(repr_simd)]
#![feature(c_unwind)]
#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))]
#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))]
#![no_builtins]
#![no_std]
#![allow(unused_features)]

View file

@ -2,7 +2,7 @@
name = "testcrate"
version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
edition = "2018"
edition = "2021"
[lib]
test = false
@ -13,6 +13,8 @@ doctest = false
# problems with system RNGs on the variety of platforms this crate is tested on.
# `xoshiro128**` is used for its quality, size, and speed at generating `u32` shift amounts.
rand_xoshiro = "0.6"
# To compare float builtins against
rustc_apfloat = "0.2.0"
[dependencies.compiler_builtins]
path = ".."
@ -28,5 +30,6 @@ utest-macros = { git = "https://github.com/japaric/utest" }
default = ["mangled-names"]
c = ["compiler_builtins/c"]
no-asm = ["compiler_builtins/no-asm"]
no-f16-f128 = ["compiler_builtins/no-f16-f128"]
mem = ["compiler_builtins/mem"]
mangled-names = ["compiler_builtins/mangled-names"]

View file

@ -1,3 +1,13 @@
#![cfg_attr(not(feature = "no-f16-f128"), feature(f16))]
#![cfg_attr(not(feature = "no-f16-f128"), feature(f128))]
// makes configuration easier
#![allow(unused_macros)]
use compiler_builtins::float::Float;
use rustc_apfloat::ieee::{Double, Single};
#[cfg(not(feature = "no-f16-f128"))]
use rustc_apfloat::ieee::{Half, Quad};
use rustc_apfloat::{Float as _, FloatConvert as _};
use testcrate::*;
macro_rules! i_to_f {
@ -130,3 +140,118 @@ fn float_to_int() {
);
});
}
macro_rules! conv {
($fX:ident, $fD:ident, $fn:ident, $apfloatX:ident, $apfloatD:ident) => {
fuzz_float(N, |x: $fX| {
let tmp0: $apfloatD = $apfloatX::from_bits(x.to_bits().into())
.convert(&mut false)
.value;
let tmp0 = $fD::from_bits(tmp0.to_bits().try_into().unwrap());
let tmp1: $fD = $fn(x);
if !Float::eq_repr(tmp0, tmp1) {
panic!(
"{}({x:?}): apfloat: {tmp0:?}, builtins: {tmp1:?}",
stringify!($fn)
);
}
});
};
}
macro_rules! extend {
($fX:ident, $fD:ident, $fn:ident) => {
fuzz_float(N, |x: $fX| {
let tmp0 = x as $fD;
let tmp1: $fD = $fn(x);
if !Float::eq_repr(tmp0, tmp1) {
panic!(
"{}({}): std: {}, builtins: {}",
stringify!($fn),
x,
tmp0,
tmp1
);
}
});
};
}
// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
#[cfg(not(target_arch = "powerpc64"))]
#[test]
fn float_extend() {
use compiler_builtins::float::extend::__extendsfdf2;
extend!(f32, f64, __extendsfdf2);
conv!(f32, f64, __extendsfdf2, Single, Double);
#[cfg(not(feature = "no-f16-f128"))]
{
use compiler_builtins::float::extend::{
__extenddftf2, __extendhfsf2, __extendhftf2, __extendsftf2, __gnu_h2f_ieee,
};
// FIXME(f16_f128): Also do extend!() for `f16` and `f128` when builtins are in nightly
conv!(f16, f32, __extendhfsf2, Half, Single);
conv!(f16, f32, __gnu_h2f_ieee, Half, Single);
conv!(f16, f128, __extendhftf2, Half, Quad);
conv!(f32, f128, __extendsftf2, Single, Quad);
conv!(f64, f128, __extenddftf2, Double, Quad);
}
}
#[cfg(target_arch = "arm")]
#[test]
fn float_extend_arm() {
use compiler_builtins::float::extend::__extendsfdf2vfp;
extend!(f32, f64, __extendsfdf2vfp);
conv!(f32, f64, __extendsfdf2vfp, Single, Double);
}
macro_rules! trunc {
($fX:ident, $fD:ident, $fn:ident) => {
fuzz_float(N, |x: $fX| {
let tmp0 = x as $fD;
let tmp1: $fD = $fn(x);
if !Float::eq_repr(tmp0, tmp1) {
panic!(
"{}({}): std: {}, builtins: {}",
stringify!($fn),
x,
tmp0,
tmp1
);
}
});
};
}
#[test]
fn float_trunc() {
use compiler_builtins::float::trunc::__truncdfsf2;
trunc!(f64, f32, __truncdfsf2);
conv!(f64, f32, __truncdfsf2, Double, Single);
#[cfg(not(feature = "no-f16-f128"))]
{
use compiler_builtins::float::trunc::{
__gnu_f2h_ieee, __truncdfhf2, __truncsfhf2, __trunctfdf2, __trunctfhf2, __trunctfsf2,
};
// FIXME(f16_f128): Also do trunc!() for `f16` and `f128` when builtins are in nightly
conv!(f32, f16, __truncsfhf2, Single, Half);
conv!(f32, f16, __gnu_f2h_ieee, Single, Half);
conv!(f64, f16, __truncdfhf2, Double, Half);
conv!(f128, f16, __trunctfhf2, Quad, Half);
conv!(f128, f32, __trunctfsf2, Quad, Single);
conv!(f128, f64, __trunctfdf2, Quad, Double);
}
}
#[cfg(target_arch = "arm")]
#[test]
fn float_trunc_arm() {
use compiler_builtins::float::trunc::__truncdfsf2vfp;
trunc!(f64, f32, __truncdfsf2vfp);
conv!(f64, f32, __truncdfsf2vfp, Double, Single)
}

View file

@ -93,41 +93,6 @@ fn leading_zeros() {
})
}
macro_rules! extend {
($fX:ident, $fD:ident, $fn:ident) => {
fuzz_float(N, |x: $fX| {
let tmp0 = x as $fD;
let tmp1: $fD = $fn(x);
if !Float::eq_repr(tmp0, tmp1) {
panic!(
"{}({}): std: {}, builtins: {}",
stringify!($fn),
x,
tmp0,
tmp1
);
}
});
};
}
// PowerPC tests are failing on LLVM 13: https://github.com/rust-lang/rust/issues/88520
#[cfg(not(target_arch = "powerpc64"))]
#[test]
fn float_extend() {
use compiler_builtins::float::extend::__extendsfdf2;
extend!(f32, f64, __extendsfdf2);
}
#[cfg(target_arch = "arm")]
#[test]
fn float_extend_arm() {
use compiler_builtins::float::extend::__extendsfdf2vfp;
extend!(f32, f64, __extendsfdf2vfp);
}
// This is approximate because of issues related to
// https://github.com/rust-lang/rust/issues/73920.
// TODO how do we resolve this indeterminacy?
@ -179,36 +144,3 @@ fn float_pow() {
f64, 1e-12, __powidf2;
);
}
macro_rules! trunc {
($fX:ident, $fD:ident, $fn:ident) => {
fuzz_float(N, |x: $fX| {
let tmp0 = x as $fD;
let tmp1: $fD = $fn(x);
if !Float::eq_repr(tmp0, tmp1) {
panic!(
"{}({}): std: {}, builtins: {}",
stringify!($fn),
x,
tmp0,
tmp1
);
}
});
};
}
#[test]
fn float_trunc() {
use compiler_builtins::float::trunc::__truncdfsf2;
trunc!(f64, f32, __truncdfsf2);
}
#[cfg(target_arch = "arm")]
#[test]
fn float_trunc_arm() {
use compiler_builtins::float::trunc::__truncdfsf2vfp;
trunc!(f64, f32, __truncdfsf2vfp);
}