Add __extendhfdf2 and add __truncdfhf2 test
LLVM doesn't seem to emit this intrinsic but it probably should, in some cases it lowers f16->f64 conversions as f16->f32->f64 with two libcalls. GCC provides this intrinsic so it is good to have anyway. Additionally, add a test for f64->f16 which was missing. [1]: https://rust.godbolt.org/z/xezM9PEnz
This commit is contained in:
parent
41f1ad2732
commit
2de09ac46a
6 changed files with 47 additions and 2 deletions
|
|
@ -96,6 +96,14 @@ intrinsics! {
|
|||
extend(a)
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
#[apple_f16_arg_abi]
|
||||
#[cfg(f16_enabled)]
|
||||
pub extern "C" fn __extendhfdf2(a: f16) -> f64 {
|
||||
extend(a)
|
||||
}
|
||||
|
||||
#[avr_skip]
|
||||
#[aapcs_on_arm]
|
||||
#[ppc_alias = __extendhfkf2]
|
||||
|
|
|
|||
|
|
@ -43,8 +43,9 @@ no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
|
|||
# Some platforms have some f128 functions but everything except integer conversions
|
||||
no-sys-f128-int-convert = []
|
||||
no-sys-f16-f128-convert = []
|
||||
no-sys-f16-f64-convert = []
|
||||
# Skip tests that rely on f16 symbols being available on the system
|
||||
no-sys-f16 = []
|
||||
no-sys-f16 = ["no-sys-f16-f64-convert"]
|
||||
|
||||
# Enable report generation without bringing in more dependencies by default
|
||||
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]
|
||||
|
|
|
|||
|
|
@ -28,6 +28,28 @@ float_bench! {
|
|||
],
|
||||
}
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
float_bench! {
|
||||
name: extend_f16_f64,
|
||||
sig: (a: f16) -> f64,
|
||||
crate_fn: extend::__extendhfdf2,
|
||||
sys_fn: __extendhfdf2,
|
||||
sys_available: not(feature = "no-sys-f16-f64-convert"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f64;
|
||||
asm!(
|
||||
"fcvt {ret:d}, {a:h}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack, pure),
|
||||
);
|
||||
|
||||
ret
|
||||
};
|
||||
],
|
||||
}
|
||||
|
||||
#[cfg(all(f16_enabled, f128_enabled))]
|
||||
float_bench! {
|
||||
name: extend_f16_f128,
|
||||
|
|
@ -93,6 +115,7 @@ pub fn float_extend() {
|
|||
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
|
||||
{
|
||||
extend_f16_f32(&mut criterion);
|
||||
extend_f16_f64(&mut criterion);
|
||||
|
||||
#[cfg(f128_enabled)]
|
||||
extend_f16_f128(&mut criterion);
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ float_bench! {
|
|||
sig: (a: f64) -> f16,
|
||||
crate_fn: trunc::__truncdfhf2,
|
||||
sys_fn: __truncdfhf2,
|
||||
sys_available: not(feature = "no-sys-f16"),
|
||||
sys_available: not(feature = "no-sys-f16-f64-convert"),
|
||||
asm: [
|
||||
#[cfg(target_arch = "aarch64")] {
|
||||
let ret: f16;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ enum Feature {
|
|||
NoSysF128,
|
||||
NoSysF128IntConvert,
|
||||
NoSysF16,
|
||||
NoSysF16F64Convert,
|
||||
NoSysF16F128Convert,
|
||||
}
|
||||
|
||||
|
|
@ -66,9 +67,15 @@ fn main() {
|
|||
|| target.arch == "wasm64"
|
||||
{
|
||||
features.insert(Feature::NoSysF16);
|
||||
features.insert(Feature::NoSysF16F64Convert);
|
||||
features.insert(Feature::NoSysF16F128Convert);
|
||||
}
|
||||
|
||||
// These platforms are missing either `__extendhfdf2` or `__truncdfhf2`.
|
||||
if target.vendor == "apple" || target.os == "windows" {
|
||||
features.insert(Feature::NoSysF16F64Convert);
|
||||
}
|
||||
|
||||
for feature in features {
|
||||
let (name, warning) = match feature {
|
||||
Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
|
||||
|
|
@ -76,6 +83,10 @@ fn main() {
|
|||
"no-sys-f128-int-convert",
|
||||
"using apfloat fallback for f128 <-> int conversions",
|
||||
),
|
||||
Feature::NoSysF16F64Convert => (
|
||||
"no-sys-f16-f64-convert",
|
||||
"using apfloat fallback for f16 <-> f64 conversions",
|
||||
),
|
||||
Feature::NoSysF16F128Convert => (
|
||||
"no-sys-f16-f128-convert",
|
||||
"using apfloat fallback for f16 <-> f128 conversions",
|
||||
|
|
|
|||
|
|
@ -311,6 +311,7 @@ mod extend {
|
|||
extend,
|
||||
f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16");
|
||||
f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16");
|
||||
f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert");
|
||||
f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert");
|
||||
f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128");
|
||||
f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128");
|
||||
|
|
@ -340,6 +341,7 @@ mod trunc {
|
|||
trunc,
|
||||
f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16");
|
||||
f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16");
|
||||
f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert");
|
||||
f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert");
|
||||
f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128");
|
||||
f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue