From e46186f9944cf10c35b228e38ba6312903587896 Mon Sep 17 00:00:00 2001
From: beetrees <b@beetr.ee>
Date: Fri, 23 May 2025 15:51:51 +0100
Subject: [PATCH] Add support for casting to and from `f16`/`f128`

---
 src/cast.rs              |  21 ++++++-
 src/codegen_f16_f128.rs  | 123 +++++++++++++++++++++++++++++++++++++++
 src/compiler_builtins.rs |  19 ++++++
 3 files changed, 161 insertions(+), 2 deletions(-)

diff --git a/src/cast.rs b/src/cast.rs
index e2346324232f..8a725680e705 100644
--- a/src/cast.rs
+++ b/src/cast.rs
@@ -1,5 +1,6 @@
 //! Various number casting functions
 
+use crate::codegen_f16_f128;
 use crate::prelude::*;
 
 pub(crate) fn clif_intcast(
@@ -36,6 +37,14 @@ pub(crate) fn clif_int_or_float_cast(
 ) -> Value {
     let from_ty = fx.bcx.func.dfg.value_type(from);
 
+    // FIXME(bytecodealliance/wasmtime#8312): Remove in favour of native
+    // Cranelift operations once Cranelift backends have lowerings for them.
+    if matches!(from_ty, types::F16 | types::F128)
+        || matches!(to_ty, types::F16 | types::F128) && from_ty != to_ty
+    {
+        return codegen_f16_f128::codegen_cast(fx, from, from_signed, to_ty, to_signed);
+    }
+
     if from_ty.is_int() && to_ty.is_int() {
         // int-like -> int-like
         clif_intcast(
@@ -58,8 +67,10 @@ pub(crate) fn clif_int_or_float_cast(
                 "__float{sign}ti{flt}f",
                 sign = if from_signed { "" } else { "un" },
                 flt = match to_ty {
+                    types::F16 => "h",
                     types::F32 => "s",
                     types::F64 => "d",
+                    types::F128 => "t",
                     _ => unreachable!("{:?}", to_ty),
                 },
             );
@@ -90,8 +101,10 @@ pub(crate) fn clif_int_or_float_cast(
                 "__fix{sign}{flt}fti",
                 sign = if to_signed { "" } else { "uns" },
                 flt = match from_ty {
+                    types::F16 => "h",
                     types::F32 => "s",
                     types::F64 => "d",
+                    types::F128 => "t",
                     _ => unreachable!("{:?}", to_ty),
                 },
             );
@@ -145,8 +158,12 @@ pub(crate) fn clif_int_or_float_cast(
     } else if from_ty.is_float() && to_ty.is_float() {
         // float -> float
         match (from_ty, to_ty) {
-            (types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from),
-            (types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from),
+            (types::F16, types::F32 | types::F64 | types::F128)
+            | (types::F32, types::F64 | types::F128)
+            | (types::F64, types::F128) => fx.bcx.ins().fpromote(to_ty, from),
+            (types::F128, types::F64 | types::F32 | types::F16)
+            | (types::F64, types::F32 | types::F16)
+            | (types::F32, types::F16) => fx.bcx.ins().fdemote(to_ty, from),
             _ => from,
         }
     } else {
diff --git a/src/codegen_f16_f128.rs b/src/codegen_f16_f128.rs
index c570fbbd993d..a887341cec43 100644
--- a/src/codegen_f16_f128.rs
+++ b/src/codegen_f16_f128.rs
@@ -13,6 +13,11 @@ pub(crate) fn f16_to_f32(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value
     fx.lib_call("__extendhfsf2", vec![arg_ty], vec![AbiParam::new(types::F32)], &[value])[0]
 }
 
+fn f16_to_f64(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let ret = f16_to_f32(fx, value);
+    fx.bcx.ins().fpromote(types::F64, ret)
+}
+
 pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
     let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" {
         types::I16
@@ -28,6 +33,21 @@ pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value
     if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret }
 }
 
+fn f64_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" {
+        types::I16
+    } else {
+        types::F16
+    };
+    let ret = fx.lib_call(
+        "__truncdfhf2",
+        vec![AbiParam::new(types::F64)],
+        vec![AbiParam::new(ret_ty)],
+        &[value],
+    )[0];
+    if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret }
+}
+
 pub(crate) fn fcmp(fx: &mut FunctionCx<'_, '_, '_>, cc: FloatCC, lhs: Value, rhs: Value) -> Value {
     let ty = fx.bcx.func.dfg.value_type(lhs);
     match ty {
@@ -99,6 +119,109 @@ pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
     fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits)
 }
 
+pub(crate) fn codegen_cast(
+    fx: &mut FunctionCx<'_, '_, '_>,
+    from: Value,
+    from_signed: bool,
+    to_ty: Type,
+    to_signed: bool,
+) -> Value {
+    let from_ty = fx.bcx.func.dfg.value_type(from);
+    if from_ty.is_float() && to_ty.is_float() {
+        let name = match (from_ty, to_ty) {
+            (types::F16, types::F32) => return f16_to_f32(fx, from),
+            (types::F16, types::F64) => return f16_to_f64(fx, from),
+            (types::F16, types::F128) => "__extendhftf2",
+            (types::F32, types::F128) => "__extendsftf2",
+            (types::F64, types::F128) => "__extenddftf2",
+            (types::F128, types::F64) => "__trunctfdf2",
+            (types::F128, types::F32) => "__trunctfsf2",
+            (types::F128, types::F16) => "__trunctfhf2",
+            (types::F64, types::F16) => return f64_to_f16(fx, from),
+            (types::F32, types::F16) => return f32_to_f16(fx, from),
+            _ => unreachable!("{from_ty:?} -> {to_ty:?}"),
+        };
+        fx.lib_call(name, vec![AbiParam::new(from_ty)], vec![AbiParam::new(to_ty)], &[from])[0]
+    } else if from_ty.is_int() && to_ty == types::F16 {
+        let res = clif_int_or_float_cast(fx, from, from_signed, types::F32, false);
+        f32_to_f16(fx, res)
+    } else if from_ty == types::F16 && to_ty.is_int() {
+        let from = f16_to_f32(fx, from);
+        clif_int_or_float_cast(fx, from, false, to_ty, to_signed)
+    } else if from_ty.is_int() && to_ty == types::F128 {
+        let (from, from_ty) = if from_ty.bits() < 32 {
+            (clif_int_or_float_cast(fx, from, from_signed, types::I32, from_signed), types::I32)
+        } else {
+            (from, from_ty)
+        };
+        let name = format!(
+            "__float{sign}{size}itf",
+            sign = if from_signed { "" } else { "un" },
+            size = match from_ty {
+                types::I32 => 's',
+                types::I64 => 'd',
+                types::I128 => 't',
+                _ => unreachable!("{from_ty:?}"),
+            },
+        );
+        fx.lib_call(
+            &name,
+            vec![lib_call_arg_param(fx.tcx, from_ty, from_signed)],
+            vec![AbiParam::new(to_ty)],
+            &[from],
+        )[0]
+    } else if from_ty == types::F128 && to_ty.is_int() {
+        let ret_ty = if to_ty.bits() < 32 { types::I32 } else { to_ty };
+        let name = format!(
+            "__fix{sign}tf{size}i",
+            sign = if from_signed { "" } else { "un" },
+            size = match ret_ty {
+                types::I32 => 's',
+                types::I64 => 'd',
+                types::I128 => 't',
+                _ => unreachable!("{from_ty:?}"),
+            },
+        );
+        let ret =
+            fx.lib_call(&name, vec![AbiParam::new(from_ty)], vec![AbiParam::new(to_ty)], &[from])
+                [0];
+        let val = if ret_ty == to_ty {
+            ret
+        } else {
+            let (min, max) = match (to_ty, to_signed) {
+                (types::I8, false) => (0, i64::from(u8::MAX)),
+                (types::I16, false) => (0, i64::from(u16::MAX)),
+                (types::I8, true) => (i64::from(i8::MIN as u32), i64::from(i8::MAX as u32)),
+                (types::I16, true) => (i64::from(i16::MIN as u32), i64::from(i16::MAX as u32)),
+                _ => unreachable!("{to_ty:?}"),
+            };
+            let min_val = fx.bcx.ins().iconst(types::I32, min);
+            let max_val = fx.bcx.ins().iconst(types::I32, max);
+
+            let val = if to_signed {
+                let has_underflow = fx.bcx.ins().icmp_imm(IntCC::SignedLessThan, ret, min);
+                let has_overflow = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThan, ret, max);
+                let bottom_capped = fx.bcx.ins().select(has_underflow, min_val, ret);
+                fx.bcx.ins().select(has_overflow, max_val, bottom_capped)
+            } else {
+                let has_overflow = fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThan, ret, max);
+                fx.bcx.ins().select(has_overflow, max_val, ret)
+            };
+            fx.bcx.ins().ireduce(to_ty, val)
+        };
+
+        if let Some(false) = fx.tcx.sess.opts.unstable_opts.saturating_float_casts {
+            return val;
+        }
+
+        let is_not_nan = fcmp(fx, FloatCC::Equal, from, from);
+        let zero = type_zero_value(&mut fx.bcx, to_ty);
+        fx.bcx.ins().select(is_not_nan, val, zero)
+    } else {
+        unreachable!("{from_ty:?} -> {to_ty:?}");
+    }
+}
+
 pub(crate) fn fmin_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
     fx.lib_call(
         "fminimumf128",
diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs
index 017a1370abd7..d9cbffd7ae01 100644
--- a/src/compiler_builtins.rs
+++ b/src/compiler_builtins.rs
@@ -49,15 +49,34 @@ builtin_functions! {
     // integer -> float
     fn __floattisf(i: i128) -> f32;
     fn __floattidf(i: i128) -> f64;
+    fn __floatsitf(i: i32) -> f128;
+    fn __floatditf(i: i64) -> f128;
+    fn __floattitf(i: i128) -> f128;
     fn __floatuntisf(i: u128) -> f32;
     fn __floatuntidf(i: u128) -> f64;
+    fn __floatunsitf(i: u32) -> f128;
+    fn __floatunditf(i: u64) -> f128;
+    fn __floatuntitf(i: u128) -> f128;
     // float -> integer
     fn __fixsfti(f: f32) -> i128;
     fn __fixdfti(f: f64) -> i128;
+    fn __fixtfsi(f: f128) -> i32;
+    fn __fixtfdi(f: f128) -> i64;
+    fn __fixtfti(f: f128) -> i128;
     fn __fixunssfti(f: f32) -> u128;
     fn __fixunsdfti(f: f64) -> u128;
+    fn __fixunstfsi(f: f128) -> u32;
+    fn __fixunstfdi(f: f128) -> u64;
+    fn __fixunstfti(f: f128) -> u128;
     // float -> float
     fn __extendhfsf2(f: f16) -> f32;
+    fn __extendhftf2(f: f16) -> f128;
+    fn __extendsftf2(f: f32) -> f128;
+    fn __extenddftf2(f: f64) -> f128;
+    fn __trunctfdf2(f: f128) -> f64;
+    fn __trunctfsf2(f: f128) -> f32;
+    fn __trunctfhf2(f: f128) -> f16;
+    fn __truncdfhf2(f: f64) -> f16;
     fn __truncsfhf2(f: f32) -> f16;
     // float binops
     fn __addtf3(a: f128, b: f128) -> f128;