Make fma a trait method on Float
This commit is contained in:
parent
720ba18931
commit
c01153d29b
5 changed files with 30 additions and 25 deletions
|
|
@ -78,6 +78,10 @@ impl Float for f8 {
|
|||
libm::generic::copysign(self, other)
|
||||
}
|
||||
|
||||
fn fma(self, _y: Self, _z: Self) -> Self {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
fn normalize(_significand: Self::Int) -> (i32, Self::Int) {
|
||||
unimplemented!()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -130,8 +130,7 @@
|
|||
"copysign": {
|
||||
"sources": [
|
||||
"src/math/copysign.rs",
|
||||
"src/math/generic/copysign.rs",
|
||||
"src/math/support/float_traits.rs"
|
||||
"src/math/generic/copysign.rs"
|
||||
],
|
||||
"type": "f64"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ ROOT_DIR = ETC_DIR.parent
|
|||
DIRECTORIES = [".github", "ci", "crates", "etc", "src"]
|
||||
|
||||
# These files do not trigger a retest.
|
||||
IGNORED_SOURCES = ["src/libm_helper.rs"]
|
||||
IGNORED_SOURCES = ["src/libm_helper.rs", "src/math/support/float_traits.rs"]
|
||||
|
||||
IndexTy: TypeAlias = dict[str, dict[str, Any]]
|
||||
"""Type of the `index` item in rustdoc's JSON output"""
|
||||
|
|
|
|||
|
|
@ -103,11 +103,11 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
|
|||
* and rr an approximation of 1/zz. We now perform another iteration of
|
||||
* Newton-Raphson, this time with a linear approximation only. */
|
||||
y2 = y * y;
|
||||
let mut y2l: f64 = fmaf64(y, y, -y2);
|
||||
let mut y2l: f64 = y.fma(y, -y2);
|
||||
|
||||
/* y2 + y2l = y^2 exactly */
|
||||
let mut y3: f64 = y2 * y;
|
||||
let mut y3l: f64 = fmaf64(y, y2, -y3) + y * y2l;
|
||||
let mut y3l: f64 = y.fma(y2, -y3) + y * y2l;
|
||||
|
||||
/* y3 + y3l approximates y^3 with about 106 bits of accuracy */
|
||||
h = ((y3 - zz) + y3l) * rr;
|
||||
|
|
@ -132,9 +132,9 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
|
|||
cold_path();
|
||||
|
||||
y2 = y1 * y1;
|
||||
y2l = fmaf64(y1, y1, -y2);
|
||||
y2l = y1.fma(y1, -y2);
|
||||
y3 = y2 * y1;
|
||||
y3l = fmaf64(y1, y2, -y3) + y1 * y2l;
|
||||
y3l = y1.fma(y2, -y3) + y1 * y2l;
|
||||
h = ((y3 - zz) + y3l) * rr;
|
||||
dy = h * (y1 * u0);
|
||||
y = y1 - dy;
|
||||
|
|
@ -198,18 +198,6 @@ pub fn cbrt_round(x: f64, round: Round) -> FpResult<f64> {
|
|||
FpResult::ok(f64::from_bits(cvt3))
|
||||
}
|
||||
|
||||
fn fmaf64(x: f64, y: f64, z: f64) -> f64 {
|
||||
#[cfg(intrinsics_enabled)]
|
||||
{
|
||||
return unsafe { core::intrinsics::fmaf64(x, y, z) };
|
||||
}
|
||||
|
||||
#[cfg(not(intrinsics_enabled))]
|
||||
{
|
||||
return super::fma(x, y, z);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
|||
|
|
@ -160,9 +160,11 @@ pub trait Float:
|
|||
fn abs(self) -> Self;
|
||||
|
||||
/// Returns a number composed of the magnitude of self and the sign of sign.
|
||||
#[allow(dead_code)]
|
||||
fn copysign(self, other: Self) -> Self;
|
||||
|
||||
/// Fused multiply add, rounding once.
|
||||
fn fma(self, y: Self, z: Self) -> Self;
|
||||
|
||||
/// Returns (normalized exponent, normalized significand)
|
||||
#[allow(dead_code)]
|
||||
fn normalize(significand: Self::Int) -> (i32, Self::Int);
|
||||
|
|
@ -184,7 +186,9 @@ macro_rules! float_impl {
|
|||
$sity:ident,
|
||||
$bits:expr,
|
||||
$significand_bits:expr,
|
||||
$from_bits:path
|
||||
$from_bits:path,
|
||||
$fma_fn:ident,
|
||||
$fma_intrinsic:ident
|
||||
) => {
|
||||
impl Float for $ty {
|
||||
type Int = $ity;
|
||||
|
|
@ -252,6 +256,16 @@ macro_rules! float_impl {
|
|||
}
|
||||
}
|
||||
}
|
||||
fn fma(self, y: Self, z: Self) -> Self {
|
||||
cfg_if! {
|
||||
// fma is not yet available in `core`
|
||||
if #[cfg(intrinsics_enabled)] {
|
||||
unsafe{ core::intrinsics::$fma_intrinsic(self, y, z) }
|
||||
} else {
|
||||
super::super::$fma_fn(self, y, z)
|
||||
}
|
||||
}
|
||||
}
|
||||
fn normalize(significand: Self::Int) -> (i32, Self::Int) {
|
||||
let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
|
||||
(1i32.wrapping_sub(shift as i32), significand << shift as Self::Int)
|
||||
|
|
@ -261,11 +275,11 @@ macro_rules! float_impl {
|
|||
}
|
||||
|
||||
#[cfg(f16_enabled)]
|
||||
float_impl!(f16, u16, i16, 16, 10, f16::from_bits);
|
||||
float_impl!(f32, u32, i32, 32, 23, f32_from_bits);
|
||||
float_impl!(f64, u64, i64, 64, 52, f64_from_bits);
|
||||
float_impl!(f16, u16, i16, 16, 10, f16::from_bits, fmaf16, fmaf16);
|
||||
float_impl!(f32, u32, i32, 32, 23, f32_from_bits, fmaf, fmaf32);
|
||||
float_impl!(f64, u64, i64, 64, 52, f64_from_bits, fma, fmaf64);
|
||||
#[cfg(f128_enabled)]
|
||||
float_impl!(f128, u128, i128, 128, 112, f128::from_bits);
|
||||
float_impl!(f128, u128, i128, 128, 112, f128::from_bits, fmaf128, fmaf128);
|
||||
|
||||
/* FIXME(msrv): vendor some things that are not const stable at our MSRV */
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue