Add f16/f128 +/-/*///% support

2025-05-23 15:51:51 +01:00 · 2025-05-23 15:51:51 +01:00 · 27a9590d3d
commit 27a9590d3d
parent 87c425b9e1
6 changed files with 135 additions and 7 deletions
--- a/build_system/build_backend.rs
+++ b/build_system/build_backend.rs
@ -18,7 +18,7 @@ pub(crate) fn build_backend(
    let mut cmd = CG_CLIF.build(&bootstrap_host_compiler, dirs);

    let mut rustflags = rustflags_from_env("RUSTFLAGS");
-    rustflags.push("-Zallow-features=rustc_private".to_owned());
+    rustflags.push("-Zallow-features=rustc_private,f16,f128".to_owned());
    rustflags_to_cmd_env(&mut cmd, "RUSTFLAGS", &rustflags);

    if env::var("CG_CLIF_EXPENSIVE_CHECKS").is_ok() {
--- a/src/base.rs
+++ b/src/base.rs
@ -15,9 +15,9 @@ use rustc_middle::ty::print::with_no_trimmed_paths;

 use crate::constant::ConstantCx;
 use crate::debuginfo::{FunctionDebugContext, TypeDebugContext};
-use crate::enable_verifier;
 use crate::prelude::*;
 use crate::pretty_clif::CommentWriter;
+use crate::{codegen_f16_f128, enable_verifier};

 pub(crate) struct CodegenedFunction {
    symbol_name: String,
@ -635,6 +635,15 @@ fn codegen_stmt<'tcx>(fx: &mut FunctionCx<'_, '_, 'tcx>, cur_block: Block, stmt:
                            let val = operand.load_scalar(fx);
                            match layout.ty.kind() {
                                ty::Int(_) => CValue::by_val(fx.bcx.ins().ineg(val), layout),
+                                // FIXME(bytecodealliance/wasmtime#8312): Remove
+                                // once backend lowerings have been added to
+                                // Cranelift.
+                                ty::Float(FloatTy::F16) => {
+                                    CValue::by_val(codegen_f16_f128::neg_f16(fx, val), layout)
+                                }
+                                ty::Float(FloatTy::F128) => {
+                                    CValue::by_val(codegen_f16_f128::neg_f128(fx, val), layout)
+                                }
                                ty::Float(_) => CValue::by_val(fx.bcx.ins().fneg(val), layout),
                                _ => unreachable!("un op Neg for {:?}", layout.ty),
                            }
--- a/src/codegen_f16_f128.rs
+++ b/src/codegen_f16_f128.rs
@ -0,0 +1,64 @@
+use crate::prelude::*;
+
+pub(crate) fn f16_to_f32(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let (value, arg_ty) =
+        if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" {
+            (
+                fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value),
+                lib_call_arg_param(fx.tcx, types::I16, false),
+            )
+        } else {
+            (value, AbiParam::new(types::F16))
+        };
+    fx.lib_call("__extendhfsf2", vec![arg_ty], vec![AbiParam::new(types::F32)], &[value])[0]
+}
+
+pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let ret_ty = if fx.tcx.sess.target.vendor == "apple" && fx.tcx.sess.target.arch == "x86_64" {
+        types::I16
+    } else {
+        types::F16
+    };
+    let ret = fx.lib_call(
+        "__truncsfhf2",
+        vec![AbiParam::new(types::F32)],
+        vec![AbiParam::new(ret_ty)],
+        &[value],
+    )[0];
+    if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret }
+}
+
+pub(crate) fn codegen_f128_binop(
+    fx: &mut FunctionCx<'_, '_, '_>,
+    bin_op: BinOp,
+    lhs: Value,
+    rhs: Value,
+) -> Value {
+    let name = match bin_op {
+        BinOp::Add => "__addtf3",
+        BinOp::Sub => "__subtf3",
+        BinOp::Mul => "__multf3",
+        BinOp::Div => "__divtf3",
+        _ => unreachable!("handled in `codegen_float_binop`"),
+    };
+    fx.lib_call(
+        name,
+        vec![AbiParam::new(types::F128), AbiParam::new(types::F128)],
+        vec![AbiParam::new(types::F128)],
+        &[lhs, rhs],
+    )[0]
+}
+
+pub(crate) fn neg_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let bits = fx.bcx.ins().bitcast(types::I16, MemFlags::new(), value);
+    let bits = fx.bcx.ins().bxor_imm(bits, 0x8000);
+    fx.bcx.ins().bitcast(types::F16, MemFlags::new(), bits)
+}
+
+pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value {
+    let bits = fx.bcx.ins().bitcast(types::I128, MemFlags::new(), value);
+    let (low, high) = fx.bcx.ins().isplit(bits);
+    let high = fx.bcx.ins().bxor_imm(high, 0x8000_0000_0000_0000_u64 as i64);
+    let bits = fx.bcx.ins().iconcat(low, high);
+    fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits)
+}
--- a/src/compiler_builtins.rs
+++ b/src/compiler_builtins.rs
@ -56,9 +56,17 @@ builtin_functions! {
    fn __fixdfti(f: f64) -> i128;
    fn __fixunssfti(f: f32) -> u128;
    fn __fixunsdfti(f: f64) -> u128;
+    // float -> float
+    fn __extendhfsf2(f: f16) -> f32;
+    fn __truncsfhf2(f: f32) -> f16;
    // float binops
+    fn __addtf3(a: f128, b: f128) -> f128;
+    fn __subtf3(a: f128, b: f128) -> f128;
+    fn __multf3(a: f128, b: f128) -> f128;
+    fn __divtf3(a: f128, b: f128) -> f128;
    fn fmodf(a: f32, b: f32) -> f32;
    fn fmod(a: f64, b: f64) -> f64;
+    fn fmodf128(a: f128, b: f128) -> f128;
    // Cranelift float libcalls
    fn fmaf(a: f32, b: f32, c: f32) -> f32;
    fn fma(a: f64, b: f64, c: f64) -> f64;
--- a/src/lib.rs
+++ b/src/lib.rs
@ -6,6 +6,9 @@
 #![cfg_attr(doc, feature(rustdoc_internals))]
 // Note: please avoid adding other feature gates where possible
 #![feature(rustc_private)]
+// Only used to define intrinsics in `compiler_builtins.rs`.
+#![feature(f16)]
+#![feature(f128)]
 // Note: please avoid adding other feature gates where possible
 #![warn(rust_2018_idioms)]
 #![warn(unreachable_pub)]
@ -57,6 +60,7 @@ mod allocator;
 mod analyze;
 mod base;
 mod cast;
+mod codegen_f16_f128;
 mod codegen_i128;
 mod common;
 mod compiler_builtins;
--- a/src/num.rs
+++ b/src/num.rs
@ -1,5 +1,6 @@
 //! Various operations on integer and floating-point numbers

+use crate::codegen_f16_f128;
 use crate::prelude::*;

 fn bin_op_to_intcc(bin_op: BinOp, signed: bool) -> IntCC {
@ -350,25 +351,60 @@ pub(crate) fn codegen_float_binop<'tcx>(
    let lhs = in_lhs.load_scalar(fx);
    let rhs = in_rhs.load_scalar(fx);

+    // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings have
+    // been added to Cranelift.
+    let (lhs, rhs) = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
+        (codegen_f16_f128::f16_to_f32(fx, lhs), codegen_f16_f128::f16_to_f32(fx, rhs))
+    } else {
+        (lhs, rhs)
+    };
    let b = fx.bcx.ins();
    let res = match bin_op {
+        // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings
+        // have been added to Cranelift.
+        BinOp::Add | BinOp::Sub | BinOp::Mul | BinOp::Div
+            if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F128) =>
+        {
+            codegen_f16_f128::codegen_f128_binop(fx, bin_op, lhs, rhs)
+        }
        BinOp::Add => b.fadd(lhs, rhs),
        BinOp::Sub => b.fsub(lhs, rhs),
        BinOp::Mul => b.fmul(lhs, rhs),
        BinOp::Div => b.fdiv(lhs, rhs),
        BinOp::Rem => {
-            let (name, ty) = match in_lhs.layout().ty.kind() {
-                ty::Float(FloatTy::F32) => ("fmodf", types::F32),
-                ty::Float(FloatTy::F64) => ("fmod", types::F64),
+            let (name, ty, lhs, rhs) = match in_lhs.layout().ty.kind() {
+                ty::Float(FloatTy::F16) => (
+                    "fmodf",
+                    types::F32,
+                    // FIXME(bytecodealliance/wasmtime#8312): Already converted
+                    // by the FIXME above.
+                    // fx.bcx.ins().fpromote(types::F32, lhs),
+                    // fx.bcx.ins().fpromote(types::F32, rhs),
+                    lhs,
+                    rhs,
+                ),
+                ty::Float(FloatTy::F32) => ("fmodf", types::F32, lhs, rhs),
+                ty::Float(FloatTy::F64) => ("fmod", types::F64, lhs, rhs),
+                ty::Float(FloatTy::F128) => ("fmodf128", types::F128, lhs, rhs),
                _ => bug!(),
            };

-            fx.lib_call(
+            let ret_val = fx.lib_call(
                name,
                vec![AbiParam::new(ty), AbiParam::new(ty)],
                vec![AbiParam::new(ty)],
                &[lhs, rhs],
-            )[0]
+            )[0];
+
+            let ret_val = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
+                // FIXME(bytecodealliance/wasmtime#8312): Use native Cranelift
+                // operation once Cranelift backend lowerings have been
+                // implemented.
+                codegen_f16_f128::f32_to_f16(fx, ret_val)
+            } else {
+                ret_val
+            };
+            return CValue::by_val(ret_val, in_lhs.layout());
        }
        BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => {
            let fltcc = match bin_op {
@ -386,6 +422,13 @@ pub(crate) fn codegen_float_binop<'tcx>(
        _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs),
    };

+    // FIXME(bytecodealliance/wasmtime#8312): Remove once backend lowerings have
+    // been added to Cranelift.
+    let res = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
+        codegen_f16_f128::f32_to_f16(fx, res)
+    } else {
+        res
+    };
    CValue::by_val(res, in_lhs.layout())
 }