From c8491ea3639a78e7bd4c243501fa8df62139bdda Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Mon, 4 Jun 2018 14:17:24 +0200 Subject: [PATCH] add vertical float math: abs, sqrt, sqrte, rsqrte, fma --- .../stdarch/coresimd/ppsv/api/float_math.rs | 132 ++++++++++++++++++ library/stdarch/coresimd/ppsv/api/mod.rs | 15 +- library/stdarch/coresimd/ppsv/codegen/abs.rs | 43 ++++++ library/stdarch/coresimd/ppsv/codegen/fma.rs | 43 ++++++ library/stdarch/coresimd/ppsv/codegen/mod.rs | 4 + library/stdarch/coresimd/ppsv/codegen/sqrt.rs | 43 ++++++ library/stdarch/coresimd/simd_llvm.rs | 7 +- 7 files changed, 281 insertions(+), 6 deletions(-) create mode 100644 library/stdarch/coresimd/ppsv/api/float_math.rs create mode 100644 library/stdarch/coresimd/ppsv/codegen/abs.rs create mode 100644 library/stdarch/coresimd/ppsv/codegen/fma.rs create mode 100644 library/stdarch/coresimd/ppsv/codegen/sqrt.rs diff --git a/library/stdarch/coresimd/ppsv/api/float_math.rs b/library/stdarch/coresimd/ppsv/api/float_math.rs new file mode 100644 index 000000000000..e0178b703cb9 --- /dev/null +++ b/library/stdarch/coresimd/ppsv/api/float_math.rs @@ -0,0 +1,132 @@ +//! Float math + +macro_rules! impl_float_math { + ($id:ident) => { + impl $id { + /// Absolute-value + #[inline] + pub fn abs(self) -> Self { + use coresimd::ppsv::codegen::abs::FloatAbs; + FloatAbs::abs(self) + } + + /// Square-root + #[inline] + pub fn sqrt(self) -> Self { + use coresimd::ppsv::codegen::sqrt::FloatSqrt; + FloatSqrt::sqrt(self) + } + + /// Square-root estimate + #[inline] + pub fn sqrte(self) -> Self { + use coresimd::simd_llvm::simd_fsqrt; + unsafe { simd_fsqrt(self) } + } + + /// Reciprocal square-root estimate + #[inline] + pub fn rsqrte(self) -> Self { + unsafe { + use coresimd::simd_llvm::simd_fsqrt; + $id::splat(1.) / simd_fsqrt(self) + } + } + + /// Fused multiply add: `self * y + z` + #[inline] + pub fn fma(self, y: Self, z: Self) -> Self { + use coresimd::ppsv::codegen::fma::FloatFma; + FloatFma::fma(self, y, z) + } + } + }; +} + +macro_rules! test_float_math { + ($id:ident, $elem_ty:ident) => { + + fn sqrt2() -> $elem_ty { + match ::mem::size_of::<$elem_ty>() { + 4 => 1.4142135 as $elem_ty, + 8 => 1.4142135623730951 as $elem_ty, + _ => unreachable!(), + } + } + + #[test] + fn abs() { + use coresimd::simd::*; + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, o.abs()); + + let mo = $id::splat(-1 as $elem_ty); + assert_eq!(o, mo.abs()); + } + + #[test] + fn sqrt() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, z.sqrt()); + assert_eq!(o, o.sqrt()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(sqrt2() as $elem_ty); + assert_eq!(e, t.sqrt()); + } + + #[test] + fn sqrte() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, z.sqrte()); + assert_eq!(o, o.sqrte()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(sqrt2() as $elem_ty); + let error = (e - t.sqrte()).abs(); + let tol = $id::splat(2.4e-4 as $elem_ty); + + assert!(error.le(tol).all()); + } + + #[test] + fn rsqrte() { + use coresimd::simd::*; + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, o.rsqrte()); + + let t = $id::splat(2 as $elem_ty); + let e = 1. / sqrt2(); + let error = (e - t.rsqrte()).abs(); + let tol = $id::splat(2.4e-4 as $elem_ty); + assert!(error.le(tol).all()); + } + + #[test] + fn fma() { + use coresimd::simd::*; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let t3 = $id::splat(3 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + assert_eq!(z, z.fma(z, z)); + assert_eq!(o, o.fma(o, z)); + assert_eq!(o, o.fma(z, o)); + assert_eq!(o, z.fma(o, o)); + + assert_eq!(t, o.fma(o, o)); + assert_eq!(t, o.fma(t, z)); + assert_eq!(t, t.fma(o, z)); + + assert_eq!(f, t.fma(t, z)); + assert_eq!(f, t.fma(o, t)); + assert_eq!(t3, t.fma(t, o)); + } + }; +} diff --git a/library/stdarch/coresimd/ppsv/api/mod.rs b/library/stdarch/coresimd/ppsv/api/mod.rs index 1915f6aeb013..1ce658e2d9a4 100644 --- a/library/stdarch/coresimd/ppsv/api/mod.rs +++ b/library/stdarch/coresimd/ppsv/api/mod.rs @@ -84,6 +84,8 @@ mod default; #[macro_use] mod eq; #[macro_use] +mod float_math; +#[macro_use] mod fmt; #[macro_use] mod from; @@ -128,7 +130,8 @@ pub trait Lanes {} /// Defines a portable packed SIMD floating-point vector type. macro_rules! simd_f_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], @@ -142,7 +145,8 @@ macro_rules! simd_f_ty { [impl_neg_op, $id, $elem_ty], [impl_partial_eq, $id], [impl_default, $id, $elem_ty], - [impl_float_minmax_ops, $id] + [impl_float_minmax_ops, $id], + [impl_float_math, $id] ); $test_macro!( @@ -160,6 +164,7 @@ macro_rules! simd_f_ty { test_default!($id, $elem_ty); test_mask_select!($mask_ty, $id, $elem_ty); test_float_minmax_ops!($id, $elem_ty); + test_float_math!($id, $elem_ty); } ); } @@ -167,7 +172,8 @@ macro_rules! simd_f_ty { /// Defines a portable packed SIMD signed-integer vector type. macro_rules! simd_i_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], @@ -221,7 +227,8 @@ macro_rules! simd_i_ty { /// Defines a portable packed SIMD unsigned-integer vector type. macro_rules! simd_u_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, + $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], diff --git a/library/stdarch/coresimd/ppsv/codegen/abs.rs b/library/stdarch/coresimd/ppsv/codegen/abs.rs new file mode 100644 index 000000000000..edca549c24af --- /dev/null +++ b/library/stdarch/coresimd/ppsv/codegen/abs.rs @@ -0,0 +1,43 @@ +//! Vector absolute value + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fabs.v2f32"] + fn abs_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.fabs.v4f32"] + fn abs_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.fabs.v8f32"] + fn abs_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.fabs.v16f32"] + fn abs_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.fabs.v2f64"] + fn abs_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.fabs.v4f64"] + fn abs_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.fabs.v8f64"] + fn abs_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatAbs { + fn abs(self) -> Self; +} + +macro_rules! impl_fabs { + ($id:ident: $fn:ident) => { + impl FloatAbs for $id { + fn abs(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fabs!(f32x2: abs_v2f32); +impl_fabs!(f32x4: abs_v4f32); +impl_fabs!(f32x8: abs_v8f32); +impl_fabs!(f32x16: abs_v16f32); +impl_fabs!(f64x2: abs_v2f64); +impl_fabs!(f64x4: abs_v4f64); +impl_fabs!(f64x8: abs_v8f64); diff --git a/library/stdarch/coresimd/ppsv/codegen/fma.rs b/library/stdarch/coresimd/ppsv/codegen/fma.rs new file mode 100644 index 000000000000..9d63ac6bee62 --- /dev/null +++ b/library/stdarch/coresimd/ppsv/codegen/fma.rs @@ -0,0 +1,43 @@ +//! Vector fused multiply add + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.fma.v2f32"] + fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2; + #[link_name = "llvm.fma.v4f32"] + fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4; + #[link_name = "llvm.fma.v8f32"] + fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8; + #[link_name = "llvm.fma.v16f32"] + fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16; + #[link_name = "llvm.fma.v2f64"] + fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2; + #[link_name = "llvm.fma.v4f64"] + fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4; + #[link_name = "llvm.fma.v8f64"] + fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8; +} + +pub(crate) trait FloatFma { + fn fma(self, y: Self, z: Self) -> Self; +} + +macro_rules! impl_fma { + ($id:ident: $fn:ident) => { + impl FloatFma for $id { + fn fma(self, y: Self, z: Self) -> Self { + unsafe { $fn(self, y, z) } + } + } + } +} + +impl_fma!(f32x2: fma_v2f32); +impl_fma!(f32x4: fma_v4f32); +impl_fma!(f32x8: fma_v8f32); +impl_fma!(f32x16: fma_v16f32); +impl_fma!(f64x2: fma_v2f64); +impl_fma!(f64x4: fma_v4f64); +impl_fma!(f64x8: fma_v8f64); diff --git a/library/stdarch/coresimd/ppsv/codegen/mod.rs b/library/stdarch/coresimd/ppsv/codegen/mod.rs index 448587b7957b..2791e0670e2b 100644 --- a/library/stdarch/coresimd/ppsv/codegen/mod.rs +++ b/library/stdarch/coresimd/ppsv/codegen/mod.rs @@ -4,3 +4,7 @@ pub mod wrapping; pub mod masks_reductions; + +pub mod sqrt; +pub mod abs; +pub mod fma; diff --git a/library/stdarch/coresimd/ppsv/codegen/sqrt.rs b/library/stdarch/coresimd/ppsv/codegen/sqrt.rs new file mode 100644 index 000000000000..8e86650555dd --- /dev/null +++ b/library/stdarch/coresimd/ppsv/codegen/sqrt.rs @@ -0,0 +1,43 @@ +//! Exact vector square-root + +use coresimd::simd::*; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.sqrt.v2f32"] + fn sqrt_v2f32(x: f32x2) -> f32x2; + #[link_name = "llvm.sqrt.v4f32"] + fn sqrt_v4f32(x: f32x4) -> f32x4; + #[link_name = "llvm.sqrt.v8f32"] + fn sqrt_v8f32(x: f32x8) -> f32x8; + #[link_name = "llvm.sqrt.v16f32"] + fn sqrt_v16f32(x: f32x16) -> f32x16; + #[link_name = "llvm.sqrt.v2f64"] + fn sqrt_v2f64(x: f64x2) -> f64x2; + #[link_name = "llvm.sqrt.v4f64"] + fn sqrt_v4f64(x: f64x4) -> f64x4; + #[link_name = "llvm.sqrt.v8f64"] + fn sqrt_v8f64(x: f64x8) -> f64x8; +} + +pub(crate) trait FloatSqrt { + fn sqrt(self) -> Self; +} + +macro_rules! impl_fsqrt { + ($id:ident: $fn:ident) => { + impl FloatSqrt for $id { + fn sqrt(self) -> Self { + unsafe { $fn(self) } + } + } + } +} + +impl_fsqrt!(f32x2: sqrt_v2f32); +impl_fsqrt!(f32x4: sqrt_v4f32); +impl_fsqrt!(f32x8: sqrt_v8f32); +impl_fsqrt!(f32x16: sqrt_v16f32); +impl_fsqrt!(f64x2: sqrt_v2f64); +impl_fsqrt!(f64x4: sqrt_v4f64); +impl_fsqrt!(f64x8: sqrt_v8f64); diff --git a/library/stdarch/coresimd/simd_llvm.rs b/library/stdarch/coresimd/simd_llvm.rs index fdcf4f9cb059..c83c2d4b350c 100644 --- a/library/stdarch/coresimd/simd_llvm.rs +++ b/library/stdarch/coresimd/simd_llvm.rs @@ -49,6 +49,9 @@ extern "platform-intrinsic" { pub fn simd_select(m: M, a: T, b: T) -> T; pub fn simd_fmin(a: T, b: T) -> T; -// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416 -// pub fn simd_fmax(a: T, b: T) -> T; + // FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416 + // pub fn simd_fmax(a: T, b: T) -> T; + + pub fn simd_fsqrt(a: T) -> T; + pub fn simd_fma(a: T, b: T, c: T) -> T; }