From 38d48dbe083b680b54e39c2ead2a92d76001786c Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 23 May 2025 15:51:51 +0100 Subject: [PATCH] Add `f16`/`f128` comparison support --- src/codegen_f16_f128.rs | 54 +++++++++++++++++++++++++ src/compiler_builtins.rs | 9 +++++ src/intrinsics/mod.rs | 87 ++++++++++++++++++++++++++++++++++++++++ src/num.rs | 17 +++++--- 4 files changed, 162 insertions(+), 5 deletions(-) diff --git a/src/codegen_f16_f128.rs b/src/codegen_f16_f128.rs index c6f0779b82db..c570fbbd993d 100644 --- a/src/codegen_f16_f128.rs +++ b/src/codegen_f16_f128.rs @@ -28,6 +28,42 @@ pub(crate) fn f32_to_f16(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value if ret_ty == types::I16 { fx.bcx.ins().bitcast(types::F16, MemFlags::new(), ret) } else { ret } } +pub(crate) fn fcmp(fx: &mut FunctionCx<'_, '_, '_>, cc: FloatCC, lhs: Value, rhs: Value) -> Value { + let ty = fx.bcx.func.dfg.value_type(lhs); + match ty { + types::F32 | types::F64 => fx.bcx.ins().fcmp(cc, lhs, rhs), + types::F16 => { + let lhs = f16_to_f32(fx, lhs); + let rhs = f16_to_f32(fx, rhs); + fx.bcx.ins().fcmp(cc, lhs, rhs) + } + types::F128 => { + let (name, int_cc) = match cc { + FloatCC::Equal => ("__eqtf2", IntCC::Equal), + FloatCC::NotEqual => ("__netf2", IntCC::NotEqual), + FloatCC::LessThan => ("__lttf2", IntCC::SignedLessThan), + FloatCC::LessThanOrEqual => ("__letf2", IntCC::SignedLessThanOrEqual), + FloatCC::GreaterThan => ("__gttf2", IntCC::SignedGreaterThan), + FloatCC::GreaterThanOrEqual => ("__getf2", IntCC::SignedGreaterThanOrEqual), + _ => unreachable!("not currently used in rustc_codegen_cranelift: {cc:?}"), + }; + let res = fx.lib_call( + name, + vec![AbiParam::new(types::F128), AbiParam::new(types::F128)], + // FIXME(rust-lang/compiler-builtins#919): This should be `I64` on non-AArch64 + // architectures, but switching it before compiler-builtins is fixed causes test + // failures. + vec![AbiParam::new(types::I32)], + &[lhs, rhs], + )[0]; + let zero = fx.bcx.ins().iconst(types::I32, 0); + let res = fx.bcx.ins().icmp(int_cc, res, zero); + res + } + _ => unreachable!("{ty:?}"), + } +} + pub(crate) fn codegen_f128_binop( fx: &mut FunctionCx<'_, '_, '_>, bin_op: BinOp, @@ -62,3 +98,21 @@ pub(crate) fn neg_f128(fx: &mut FunctionCx<'_, '_, '_>, value: Value) -> Value { let bits = fx.bcx.ins().iconcat(low, high); fx.bcx.ins().bitcast(types::F128, MemFlags::new(), bits) } + +pub(crate) fn fmin_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { + fx.lib_call( + "fminimumf128", + vec![AbiParam::new(types::F128), AbiParam::new(types::F128)], + vec![AbiParam::new(types::F128)], + &[a, b], + )[0] +} + +pub(crate) fn fmax_f128(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { + fx.lib_call( + "fmaximumf128", + vec![AbiParam::new(types::F128), AbiParam::new(types::F128)], + vec![AbiParam::new(types::F128)], + &[a, b], + )[0] +} diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs index 0c75df845db0..017a1370abd7 100644 --- a/src/compiler_builtins.rs +++ b/src/compiler_builtins.rs @@ -67,6 +67,15 @@ builtin_functions! { fn fmodf(a: f32, b: f32) -> f32; fn fmod(a: f64, b: f64) -> f64; fn fmodf128(a: f128, b: f128) -> f128; + // float comparison + fn __eqtf2(a: f128, b: f128) -> i32; + fn __netf2(a: f128, b: f128) -> i32; + fn __lttf2(a: f128, b: f128) -> i32; + fn __letf2(a: f128, b: f128) -> i32; + fn __gttf2(a: f128, b: f128) -> i32; + fn __getf2(a: f128, b: f128) -> i32; + fn fminimumf128(a: f128, b: f128) -> f128; + fn fmaximumf128(a: f128, b: f128) -> f128; // Cranelift float libcalls fn fmaf(a: f32, b: f32, c: f32) -> f32; fn fma(a: f64, b: f64, c: f64) -> f64; diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 7c18922d93cd..6481d211234e 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -27,6 +27,7 @@ use rustc_span::{Symbol, sym}; pub(crate) use self::llvm::codegen_llvm_intrinsic_call; use crate::cast::clif_intcast; +use crate::codegen_f16_f128; use crate::prelude::*; fn bug_on_incorrect_arg_count(intrinsic: impl std::fmt::Display) -> ! { @@ -1118,6 +1119,20 @@ fn codegen_regular_intrinsic_call<'tcx>( ret.write_cvalue(fx, old); } + sym::minimumf16 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + // FIXME(bytecodealliance/wasmtime#8312): Use `fmin` directly once + // Cranelift backend lowerings are implemented. + let a = codegen_f16_f128::f16_to_f32(fx, a); + let b = codegen_f16_f128::f16_to_f32(fx, b); + let val = fx.bcx.ins().fmin(a, b); + let val = codegen_f16_f128::f32_to_f16(fx, val); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16)); + ret.write_cvalue(fx, val); + } sym::minimumf32 => { intrinsic_args!(fx, args => (a, b); intrinsic); let a = a.load_scalar(fx); @@ -1136,6 +1151,31 @@ fn codegen_regular_intrinsic_call<'tcx>( let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); ret.write_cvalue(fx, val); } + sym::minimumf128 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + // FIXME(bytecodealliance/wasmtime#8312): Use `fmin` once Cranelift + // backend lowerings are implemented. + let val = codegen_f16_f128::fmin_f128(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128)); + ret.write_cvalue(fx, val); + } + sym::maximumf16 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + // FIXME(bytecodealliance/wasmtime#8312): Use `fmax` directly once + // Cranelift backend lowerings are implemented. + let a = codegen_f16_f128::f16_to_f32(fx, a); + let b = codegen_f16_f128::f16_to_f32(fx, b); + let val = fx.bcx.ins().fmax(a, b); + let val = codegen_f16_f128::f32_to_f16(fx, val); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16)); + ret.write_cvalue(fx, val); + } sym::maximumf32 => { intrinsic_args!(fx, args => (a, b); intrinsic); let a = a.load_scalar(fx); @@ -1154,7 +1194,27 @@ fn codegen_regular_intrinsic_call<'tcx>( let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); ret.write_cvalue(fx, val); } + sym::maximumf128 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + // FIXME(bytecodealliance/wasmtime#8312): Use `fmax` once Cranelift + // backend lowerings are implemented. + let val = codegen_f16_f128::fmax_f128(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128)); + ret.write_cvalue(fx, val); + } + + sym::minnumf16 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let val = crate::num::codegen_float_min(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16)); + ret.write_cvalue(fx, val); + } sym::minnumf32 => { intrinsic_args!(fx, args => (a, b); intrinsic); let a = a.load_scalar(fx); @@ -1173,6 +1233,24 @@ fn codegen_regular_intrinsic_call<'tcx>( let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); ret.write_cvalue(fx, val); } + sym::minnumf128 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let val = crate::num::codegen_float_min(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128)); + ret.write_cvalue(fx, val); + } + sym::maxnumf16 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let val = crate::num::codegen_float_max(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f16)); + ret.write_cvalue(fx, val); + } sym::maxnumf32 => { intrinsic_args!(fx, args => (a, b); intrinsic); let a = a.load_scalar(fx); @@ -1191,6 +1269,15 @@ fn codegen_regular_intrinsic_call<'tcx>( let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); ret.write_cvalue(fx, val); } + sym::maxnumf128 => { + intrinsic_args!(fx, args => (a, b); intrinsic); + let a = a.load_scalar(fx); + let b = b.load_scalar(fx); + + let val = crate::num::codegen_float_max(fx, a, b); + let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f128)); + ret.write_cvalue(fx, val); + } sym::catch_unwind => { intrinsic_args!(fx, args => (f, data, catch_fn); intrinsic); diff --git a/src/num.rs b/src/num.rs index 3ed276267de6..f53045df6e79 100644 --- a/src/num.rs +++ b/src/num.rs @@ -416,7 +416,10 @@ pub(crate) fn codegen_float_binop<'tcx>( BinOp::Gt => FloatCC::GreaterThan, _ => unreachable!(), }; - let val = fx.bcx.ins().fcmp(fltcc, lhs, rhs); + // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift + // `fcmp` once `f16`/`f128` backend lowerings have been added to + // Cranelift. + let val = codegen_f16_f128::fcmp(fx, fltcc, lhs, rhs); return CValue::by_val(val, fx.layout_of(fx.tcx.types.bool)); } _ => unreachable!("{:?}({:?}, {:?})", bin_op, in_lhs, in_rhs), @@ -500,15 +503,19 @@ fn codegen_ptr_binop<'tcx>( // and `a.is_nan() ? b : (a <= b ? b : a)` for `maxnumf*`. NaN checks are done by comparing // a float against itself. Only in case of NaN is it not equal to itself. pub(crate) fn codegen_float_min(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { - let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a); - let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b); + // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift `fcmp` once + // `f16`/`f128` backend lowerings have been added to Cranelift. + let a_is_nan = codegen_f16_f128::fcmp(fx, FloatCC::NotEqual, a, a); + let a_ge_b = codegen_f16_f128::fcmp(fx, FloatCC::GreaterThanOrEqual, a, b); let temp = fx.bcx.ins().select(a_ge_b, b, a); fx.bcx.ins().select(a_is_nan, b, temp) } pub(crate) fn codegen_float_max(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value { - let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a); - let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b); + // FIXME(bytecodealliance/wasmtime#8312): Replace with Cranelift `fcmp` once + // `f16`/`f128` backend lowerings have been added to Cranelift. + let a_is_nan = codegen_f16_f128::fcmp(fx, FloatCC::NotEqual, a, a); + let a_le_b = codegen_f16_f128::fcmp(fx, FloatCC::LessThanOrEqual, a, b); let temp = fx.bcx.ins().select(a_le_b, b, a); fx.bcx.ins().select(a_is_nan, b, temp) }