Add __extendhfdf2 and add __truncdfhf2 test

LLVM doesn't seem to emit this intrinsic but it probably should, in some
cases it lowers f16->f64 conversions as f16->f32->f64 with two libcalls.
GCC provides this intrinsic so it is good to have anyway.

Additionally, add a test for f64->f16 which was missing.

[1]: https://rust.godbolt.org/z/xezM9PEnz
This commit is contained in:
Trevor Gross 2025-03-04 16:21:15 -05:00 committed by Trevor Gross
parent 41f1ad2732
commit 2de09ac46a
6 changed files with 47 additions and 2 deletions

View file

@ -96,6 +96,14 @@ intrinsics! {
extend(a)
}
#[avr_skip]
#[aapcs_on_arm]
#[apple_f16_arg_abi]
#[cfg(f16_enabled)]
pub extern "C" fn __extendhfdf2(a: f16) -> f64 {
extend(a)
}
#[avr_skip]
#[aapcs_on_arm]
#[ppc_alias = __extendhfkf2]

View file

@ -43,8 +43,9 @@ no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
# Some platforms have some f128 functions but everything except integer conversions
no-sys-f128-int-convert = []
no-sys-f16-f128-convert = []
no-sys-f16-f64-convert = []
# Skip tests that rely on f16 symbols being available on the system
no-sys-f16 = []
no-sys-f16 = ["no-sys-f16-f64-convert"]
# Enable report generation without bringing in more dependencies by default
benchmarking-reports = ["criterion/plotters", "criterion/html_reports"]

View file

@ -28,6 +28,28 @@ float_bench! {
],
}
#[cfg(f16_enabled)]
float_bench! {
name: extend_f16_f64,
sig: (a: f16) -> f64,
crate_fn: extend::__extendhfdf2,
sys_fn: __extendhfdf2,
sys_available: not(feature = "no-sys-f16-f64-convert"),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f64;
asm!(
"fcvt {ret:d}, {a:h}",
a = in(vreg) a,
ret = lateout(vreg) ret,
options(nomem, nostack, pure),
);
ret
};
],
}
#[cfg(all(f16_enabled, f128_enabled))]
float_bench! {
name: extend_f16_f128,
@ -93,6 +115,7 @@ pub fn float_extend() {
#[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
{
extend_f16_f32(&mut criterion);
extend_f16_f64(&mut criterion);
#[cfg(f128_enabled)]
extend_f16_f128(&mut criterion);

View file

@ -33,7 +33,7 @@ float_bench! {
sig: (a: f64) -> f16,
crate_fn: trunc::__truncdfhf2,
sys_fn: __truncdfhf2,
sys_available: not(feature = "no-sys-f16"),
sys_available: not(feature = "no-sys-f16-f64-convert"),
asm: [
#[cfg(target_arch = "aarch64")] {
let ret: f16;

View file

@ -6,6 +6,7 @@ enum Feature {
NoSysF128,
NoSysF128IntConvert,
NoSysF16,
NoSysF16F64Convert,
NoSysF16F128Convert,
}
@ -66,9 +67,15 @@ fn main() {
|| target.arch == "wasm64"
{
features.insert(Feature::NoSysF16);
features.insert(Feature::NoSysF16F64Convert);
features.insert(Feature::NoSysF16F128Convert);
}
// These platforms are missing either `__extendhfdf2` or `__truncdfhf2`.
if target.vendor == "apple" || target.os == "windows" {
features.insert(Feature::NoSysF16F64Convert);
}
for feature in features {
let (name, warning) = match feature {
Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
@ -76,6 +83,10 @@ fn main() {
"no-sys-f128-int-convert",
"using apfloat fallback for f128 <-> int conversions",
),
Feature::NoSysF16F64Convert => (
"no-sys-f16-f64-convert",
"using apfloat fallback for f16 <-> f64 conversions",
),
Feature::NoSysF16F128Convert => (
"no-sys-f16-f128-convert",
"using apfloat fallback for f16 <-> f128 conversions",

View file

@ -311,6 +311,7 @@ mod extend {
extend,
f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16");
f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16");
f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert");
f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert");
f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128");
f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128");
@ -340,6 +341,7 @@ mod trunc {
trunc,
f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16");
f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16");
f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert");
f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert");
f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128");
f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128");