From e46186f9944cf10c35b228e38ba6312903587896 Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH] Add support for casting to and from `f16`/`f128` --- src/cast.rs | 21 ++++++- src/codegen_f16_f128.rs | 123 +++++++++++++++++++++++++++++++++++++++ src/compiler_builtins.rs | 19 ++++++ 3 files changed, 161 insertions(+), 2 deletions(-) diff --git a/src/cast.rs b/src/cast.rs index e2346324232f..8a725680e705 100644 --- a/src/cast.rs +++ b/src/cast.rs @@ -1,5 +1,6 @@ //! Various number casting functions +use crate::codegen_f16_f128; use crate::prelude::*; pub(crate) fn clif_intcast( @@ -36,6 +37,14 @@ pub(crate) fn clif_int_or_float_cast( ) -> Value { let from_ty = fx.bcx.func.dfg.value_type(from); + // FIXME(bytecodealliance/wasmtime#8312): Remove in favour of native + // Cranelift operations once Cranelift backends have lowerings for them. + if matches!(from_ty, types::F16 | types::F128) + || matches!(to_ty, types::F16 | types::F128) && from_ty != to_ty + { + return codegen_f16_f128::codegen_cast(fx, from, from_signed, to_ty, to_signed); + } + if from_ty.is_int() && to_ty.is_int() { // int-like -> int-like clif_intcast( @@ -58,8 +67,10 @@ pub(crate) fn clif_int_or_float_cast( "__float{sign}ti{flt}f", sign = if from_signed { "" } else { "un" }, flt = match to_ty { + types::F16 => "h", types::F32 => "s", types::F64 => "d", + types::F128 => "t", _ => unreachable!("{:?}", to_ty), }, ); @@ -90,8 +101,10 @@ pub(crate) fn clif_int_or_float_cast( "__fix{sign}{flt}fti", sign = if to_signed { "" } else { "uns" }, flt = match from_ty { + types::F16 => "h", types::F32 => "s", types::F64 => "d", + types::F128 => "t", _ => unreachable!("{:?}", to_ty), }, ); @@ -145,8 +158,12 @@ pub(crate) fn clif_int_or_float_cast( } else if from_ty.is_float() && to_ty.is_float() { // float -> float match (from_ty, to_ty) { - (types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from), - (types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from), + (types::F16, types::F32 | types::F64 | types::F128) + | (types::F32, types::F64 | types::F128) + | (types::F64, types::F128) => fx.bcx.ins().fpromote(to_ty, from), + (types::F128, types::F64 | types::F32 | types::F16) + | (types::F64, types::F32 | types::F16) + | (types::F32, types::F16) => fx.bcx.ins().fdemote(to_ty, from), _ => from, } } else { diff --git a/src/codegen_f16_f128.rs b/src/codegen_f16_f128.rs index c570fbbd993d..a887341cec43 100644 --- a/src/codegen_f16_f128.rs +++ b/src/codegen_f16_f128.rs @@ -13,6 +13,11 @@ pub(crate) fn f16_to_f32(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value fx.lib_call("__extendhfsf2", vec![arg_ty], vec![AbiParam::new(types::F32)], &[value])[0] } +fn f16_to_f64(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let ret = f16_to_f32(fx, value); + fx.bcx.ins().fpromote(types::F64, ret) +} + pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" { types::I16 @@ -28,6 +33,21 @@ pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret } } +fn f64_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { + let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" { + types::I16 + } else { + types::F16 + }; + let ret = fx.lib_call( + "__truncdfhf2", + vec![AbiParam::new(types::F64)], + vec![AbiParam::new(ret_ty)], + &[value], + )[0]; + if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret } +} + pub(crate) fn fcmp(fx: &mut FunctionCx<'_, '_, '_>, cc: FloatCC, lhs: Value, rhs: Value) -> Value { let ty = fx.bcx.func.dfg.value_type(lhs); match ty { @@ -99,6 +119,109 @@ pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits) } +pub(crate) fn codegen_cast( + fx: &mut FunctionCx<'_, '_, '_>, + from: Value, + from_signed: bool, + to_ty: Type, + to_signed: bool, +) -> Value { + let from_ty = fx.bcx.func.dfg.value_type(from); + if from_ty.is_float() && to_ty.is_float() { + let name = match (from_ty, to_ty) { + (types::F16, types::F32) => return f16_to_f32(fx, from), + (types::F16, types::F64) => return f16_to_f64(fx, from), + (types::F16, types::F128) => "__extendhftf2", + (types::F32, types::F128) => "__extendsftf2", + (types::F64, types::F128) => "__extenddftf2", + (types::F128, types::F64) => "__trunctfdf2", + (types::F128, types::F32) => "__trunctfsf2", + (types::F128, types::F16) => "__trunctfhf2", + (types::F64, types::F16) => return f64_to_f16(fx, from), + (types::F32, types::F16) => return f32_to_f16(fx, from), + _ => unreachable!("{from_ty:?} -> {to_ty:?}"), + }; + fx.lib_call(name, vec![AbiParam::new(from_ty)], vec![AbiParam::new(to_ty)], &[from])[0] + } else if from_ty.is_int() && to_ty == types::F16 { + let res = clif_int_or_float_cast(fx, from, from_signed, types::F32, false); + f32_to_f16(fx, res) + } else if from_ty == types::F16 && to_ty.is_int() { + let from = f16_to_f32(fx, from); + clif_int_or_float_cast(fx, from, false, to_ty, to_signed) + } else if from_ty.is_int() && to_ty == types::F128 { + let (from, from_ty) = if from_ty.bits() < 32 { + (clif_int_or_float_cast(fx, from, from_signed, types::I32, from_signed), types::I32) + } else { + (from, from_ty) + }; + let name = format!( + "__float{sign}{size}itf", + sign = if from_signed { "" } else { "un" }, + size = match from_ty { + types::I32 => 's', + types::I64 => 'd', + types::I128 => 't', + _ => unreachable!("{from_ty:?}"), + }, + ); + fx.lib_call( + &name, + vec![lib_call_arg_param(fx.tcx, from_ty, from_signed)], + vec![AbiParam::new(to_ty)], + &[from], + )[0] + } else if from_ty == types::F128 && to_ty.is_int() { + let ret_ty = if to_ty.bits() < 32 { types::I32 } else { to_ty }; + let name = format!( + "__fix{sign}tf{size}i", + sign = if from_signed { "" } else { "un" }, + size = match ret_ty { + types::I32 => 's', + types::I64 => 'd', + types::I128 => 't', + _ => unreachable!("{from_ty:?}"), + }, + ); + let ret = + fx.lib_call(&name, vec![AbiParam::new(from_ty)], vec![AbiParam::new(to_ty)], &[from]) + [0]; + let val = if ret_ty == to_ty { + ret + } else { + let (min, max) = match (to_ty, to_signed) { + (types::I8, false) => (0, i64::from(u8::MAX)), + (types::I16, false) => (0, i64::from(u16::MAX)), + (types::I8, true) => (i64::from(i8::MIN as u32), i64::from(i8::MAX as u32)), + (types::I16, true) => (i64::from(i16::MIN as u32), i64::from(i16::MAX as u32)), + _ => unreachable!("{to_ty:?}"), + }; + let min_val = fx.bcx.ins().iconst(types::I32, min); + let max_val = fx.bcx.ins().iconst(types::I32, max); + + let val = if to_signed { + let has_underflow = fx.bcx.ins().icmp_imm(IntCC::SignedLessThan, ret, min); + let has_overflow = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThan, ret, max); + let bottom_capped = fx.bcx.ins().select(has_underflow, min_val, ret); + fx.bcx.ins().select(has_overflow, max_val, bottom_capped) + } else { + let has_overflow = fx.bcx.ins().icmp_imm(IntCC::UnsignedGreaterThan, ret, max); + fx.bcx.ins().select(has_overflow, max_val, ret) + }; + fx.bcx.ins().ireduce(to_ty, val) + }; + + if let Some(false) = fx.tcx.sess.opts.unstable_opts.saturating_float_casts { + return val; + } + + let is_not_nan = fcmp(fx, FloatCC::Equal, from, from); + let zero = type_zero_value(&mut fx.bcx, to_ty); + fx.bcx.ins().select(is_not_nan, val, zero) + } else { + unreachable!("{from_ty:?} -> {to_ty:?}"); + } +} + pub(crate) fn fmin_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { fx.lib_call( "fminimumf128", diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs index 017a1370abd7..d9cbffd7ae01 100644 --- a/src/compiler_builtins.rs +++ b/src/compiler_builtins.rs @@ -49,15 +49,34 @@ builtin_functions! { // integer -> float fn __floattisf(i: i128) -> f32; fn __floattidf(i: i128) -> f64; + fn __floatsitf(i: i32) -> f128; + fn __floatditf(i: i64) -> f128; + fn __floattitf(i: i128) -> f128; fn __floatuntisf(i: u128) -> f32; fn __floatuntidf(i: u128) -> f64; + fn __floatunsitf(i: u32) -> f128; + fn __floatunditf(i: u64) -> f128; + fn __floatuntitf(i: u128) -> f128; // float -> integer fn __fixsfti(f: f32) -> i128; fn __fixdfti(f: f64) -> i128; + fn __fixtfsi(f: f128) -> i32; + fn __fixtfdi(f: f128) -> i64; + fn __fixtfti(f: f128) -> i128; fn __fixunssfti(f: f32) -> u128; fn __fixunsdfti(f: f64) -> u128; + fn __fixunstfsi(f: f128) -> u32; + fn __fixunstfdi(f: f128) -> u64; + fn __fixunstfti(f: f128) -> u128; // float -> float fn __extendhfsf2(f: f16) -> f32; + fn __extendhftf2(f: f16) -> f128; + fn __extendsftf2(f: f32) -> f128; + fn __extenddftf2(f: f64) -> f128; + fn __trunctfdf2(f: f128) -> f64; + fn __trunctfsf2(f: f128) -> f32; + fn __trunctfhf2(f: f128) -> f16; + fn __truncdfhf2(f: f64) -> f16; fn __truncsfhf2(f: f32) -> f16; // float binops fn __addtf3(a: f128, b: f128) -> f128;