add vertical float math: abs, sqrt, sqrte, rsqrte, fma
This commit is contained in:
parent
b3b668e499
commit
c8491ea363
7 changed files with 281 additions and 6 deletions
132
library/stdarch/coresimd/ppsv/api/float_math.rs
Normal file
132
library/stdarch/coresimd/ppsv/api/float_math.rs
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
//! Float math
|
||||
|
||||
macro_rules! impl_float_math {
|
||||
($id:ident) => {
|
||||
impl $id {
|
||||
/// Absolute-value
|
||||
#[inline]
|
||||
pub fn abs(self) -> Self {
|
||||
use coresimd::ppsv::codegen::abs::FloatAbs;
|
||||
FloatAbs::abs(self)
|
||||
}
|
||||
|
||||
/// Square-root
|
||||
#[inline]
|
||||
pub fn sqrt(self) -> Self {
|
||||
use coresimd::ppsv::codegen::sqrt::FloatSqrt;
|
||||
FloatSqrt::sqrt(self)
|
||||
}
|
||||
|
||||
/// Square-root estimate
|
||||
#[inline]
|
||||
pub fn sqrte(self) -> Self {
|
||||
use coresimd::simd_llvm::simd_fsqrt;
|
||||
unsafe { simd_fsqrt(self) }
|
||||
}
|
||||
|
||||
/// Reciprocal square-root estimate
|
||||
#[inline]
|
||||
pub fn rsqrte(self) -> Self {
|
||||
unsafe {
|
||||
use coresimd::simd_llvm::simd_fsqrt;
|
||||
$id::splat(1.) / simd_fsqrt(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Fused multiply add: `self * y + z`
|
||||
#[inline]
|
||||
pub fn fma(self, y: Self, z: Self) -> Self {
|
||||
use coresimd::ppsv::codegen::fma::FloatFma;
|
||||
FloatFma::fma(self, y, z)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! test_float_math {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
|
||||
fn sqrt2() -> $elem_ty {
|
||||
match ::mem::size_of::<$elem_ty>() {
|
||||
4 => 1.4142135 as $elem_ty,
|
||||
8 => 1.4142135623730951 as $elem_ty,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn abs() {
|
||||
use coresimd::simd::*;
|
||||
let o = $id::splat(1 as $elem_ty);
|
||||
assert_eq!(o, o.abs());
|
||||
|
||||
let mo = $id::splat(-1 as $elem_ty);
|
||||
assert_eq!(o, mo.abs());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sqrt() {
|
||||
use coresimd::simd::*;
|
||||
let z = $id::splat(0 as $elem_ty);
|
||||
let o = $id::splat(1 as $elem_ty);
|
||||
assert_eq!(z, z.sqrt());
|
||||
assert_eq!(o, o.sqrt());
|
||||
|
||||
let t = $id::splat(2 as $elem_ty);
|
||||
let e = $id::splat(sqrt2() as $elem_ty);
|
||||
assert_eq!(e, t.sqrt());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sqrte() {
|
||||
use coresimd::simd::*;
|
||||
let z = $id::splat(0 as $elem_ty);
|
||||
let o = $id::splat(1 as $elem_ty);
|
||||
assert_eq!(z, z.sqrte());
|
||||
assert_eq!(o, o.sqrte());
|
||||
|
||||
let t = $id::splat(2 as $elem_ty);
|
||||
let e = $id::splat(sqrt2() as $elem_ty);
|
||||
let error = (e - t.sqrte()).abs();
|
||||
let tol = $id::splat(2.4e-4 as $elem_ty);
|
||||
|
||||
assert!(error.le(tol).all());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rsqrte() {
|
||||
use coresimd::simd::*;
|
||||
let o = $id::splat(1 as $elem_ty);
|
||||
assert_eq!(o, o.rsqrte());
|
||||
|
||||
let t = $id::splat(2 as $elem_ty);
|
||||
let e = 1. / sqrt2();
|
||||
let error = (e - t.rsqrte()).abs();
|
||||
let tol = $id::splat(2.4e-4 as $elem_ty);
|
||||
assert!(error.le(tol).all());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fma() {
|
||||
use coresimd::simd::*;
|
||||
let z = $id::splat(0 as $elem_ty);
|
||||
let o = $id::splat(1 as $elem_ty);
|
||||
let t = $id::splat(2 as $elem_ty);
|
||||
let t3 = $id::splat(3 as $elem_ty);
|
||||
let f = $id::splat(4 as $elem_ty);
|
||||
|
||||
assert_eq!(z, z.fma(z, z));
|
||||
assert_eq!(o, o.fma(o, z));
|
||||
assert_eq!(o, o.fma(z, o));
|
||||
assert_eq!(o, z.fma(o, o));
|
||||
|
||||
assert_eq!(t, o.fma(o, o));
|
||||
assert_eq!(t, o.fma(t, z));
|
||||
assert_eq!(t, t.fma(o, z));
|
||||
|
||||
assert_eq!(f, t.fma(t, z));
|
||||
assert_eq!(f, t.fma(o, t));
|
||||
assert_eq!(t3, t.fma(t, o));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
@ -84,6 +84,8 @@ mod default;
|
|||
#[macro_use]
|
||||
mod eq;
|
||||
#[macro_use]
|
||||
mod float_math;
|
||||
#[macro_use]
|
||||
mod fmt;
|
||||
#[macro_use]
|
||||
mod from;
|
||||
|
|
@ -128,7 +130,8 @@ pub trait Lanes<A> {}
|
|||
|
||||
/// Defines a portable packed SIMD floating-point vector type.
|
||||
macro_rules! simd_f_ty {
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
|
||||
$test_macro:ident |
|
||||
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
||||
vector_impl!(
|
||||
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
|
||||
|
|
@ -142,7 +145,8 @@ macro_rules! simd_f_ty {
|
|||
[impl_neg_op, $id, $elem_ty],
|
||||
[impl_partial_eq, $id],
|
||||
[impl_default, $id, $elem_ty],
|
||||
[impl_float_minmax_ops, $id]
|
||||
[impl_float_minmax_ops, $id],
|
||||
[impl_float_math, $id]
|
||||
);
|
||||
|
||||
$test_macro!(
|
||||
|
|
@ -160,6 +164,7 @@ macro_rules! simd_f_ty {
|
|||
test_default!($id, $elem_ty);
|
||||
test_mask_select!($mask_ty, $id, $elem_ty);
|
||||
test_float_minmax_ops!($id, $elem_ty);
|
||||
test_float_math!($id, $elem_ty);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
@ -167,7 +172,8 @@ macro_rules! simd_f_ty {
|
|||
|
||||
/// Defines a portable packed SIMD signed-integer vector type.
|
||||
macro_rules! simd_i_ty {
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
|
||||
$test_macro:ident |
|
||||
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
||||
vector_impl!(
|
||||
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
|
||||
|
|
@ -221,7 +227,8 @@ macro_rules! simd_i_ty {
|
|||
|
||||
/// Defines a portable packed SIMD unsigned-integer vector type.
|
||||
macro_rules! simd_u_ty {
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident |
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident,
|
||||
$test_macro:ident |
|
||||
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
||||
vector_impl!(
|
||||
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
|
||||
|
|
|
|||
43
library/stdarch/coresimd/ppsv/codegen/abs.rs
Normal file
43
library/stdarch/coresimd/ppsv/codegen/abs.rs
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
//! Vector absolute value
|
||||
|
||||
use coresimd::simd::*;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.fabs.v2f32"]
|
||||
fn abs_v2f32(x: f32x2) -> f32x2;
|
||||
#[link_name = "llvm.fabs.v4f32"]
|
||||
fn abs_v4f32(x: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.fabs.v8f32"]
|
||||
fn abs_v8f32(x: f32x8) -> f32x8;
|
||||
#[link_name = "llvm.fabs.v16f32"]
|
||||
fn abs_v16f32(x: f32x16) -> f32x16;
|
||||
#[link_name = "llvm.fabs.v2f64"]
|
||||
fn abs_v2f64(x: f64x2) -> f64x2;
|
||||
#[link_name = "llvm.fabs.v4f64"]
|
||||
fn abs_v4f64(x: f64x4) -> f64x4;
|
||||
#[link_name = "llvm.fabs.v8f64"]
|
||||
fn abs_v8f64(x: f64x8) -> f64x8;
|
||||
}
|
||||
|
||||
pub(crate) trait FloatAbs {
|
||||
fn abs(self) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! impl_fabs {
|
||||
($id:ident: $fn:ident) => {
|
||||
impl FloatAbs for $id {
|
||||
fn abs(self) -> Self {
|
||||
unsafe { $fn(self) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_fabs!(f32x2: abs_v2f32);
|
||||
impl_fabs!(f32x4: abs_v4f32);
|
||||
impl_fabs!(f32x8: abs_v8f32);
|
||||
impl_fabs!(f32x16: abs_v16f32);
|
||||
impl_fabs!(f64x2: abs_v2f64);
|
||||
impl_fabs!(f64x4: abs_v4f64);
|
||||
impl_fabs!(f64x8: abs_v8f64);
|
||||
43
library/stdarch/coresimd/ppsv/codegen/fma.rs
Normal file
43
library/stdarch/coresimd/ppsv/codegen/fma.rs
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
//! Vector fused multiply add
|
||||
|
||||
use coresimd::simd::*;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.fma.v2f32"]
|
||||
fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
|
||||
#[link_name = "llvm.fma.v4f32"]
|
||||
fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.fma.v8f32"]
|
||||
fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
|
||||
#[link_name = "llvm.fma.v16f32"]
|
||||
fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
|
||||
#[link_name = "llvm.fma.v2f64"]
|
||||
fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
|
||||
#[link_name = "llvm.fma.v4f64"]
|
||||
fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
|
||||
#[link_name = "llvm.fma.v8f64"]
|
||||
fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
|
||||
}
|
||||
|
||||
pub(crate) trait FloatFma {
|
||||
fn fma(self, y: Self, z: Self) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! impl_fma {
|
||||
($id:ident: $fn:ident) => {
|
||||
impl FloatFma for $id {
|
||||
fn fma(self, y: Self, z: Self) -> Self {
|
||||
unsafe { $fn(self, y, z) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_fma!(f32x2: fma_v2f32);
|
||||
impl_fma!(f32x4: fma_v4f32);
|
||||
impl_fma!(f32x8: fma_v8f32);
|
||||
impl_fma!(f32x16: fma_v16f32);
|
||||
impl_fma!(f64x2: fma_v2f64);
|
||||
impl_fma!(f64x4: fma_v4f64);
|
||||
impl_fma!(f64x8: fma_v8f64);
|
||||
|
|
@ -4,3 +4,7 @@
|
|||
pub mod wrapping;
|
||||
|
||||
pub mod masks_reductions;
|
||||
|
||||
pub mod sqrt;
|
||||
pub mod abs;
|
||||
pub mod fma;
|
||||
|
|
|
|||
43
library/stdarch/coresimd/ppsv/codegen/sqrt.rs
Normal file
43
library/stdarch/coresimd/ppsv/codegen/sqrt.rs
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
//! Exact vector square-root
|
||||
|
||||
use coresimd::simd::*;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.sqrt.v2f32"]
|
||||
fn sqrt_v2f32(x: f32x2) -> f32x2;
|
||||
#[link_name = "llvm.sqrt.v4f32"]
|
||||
fn sqrt_v4f32(x: f32x4) -> f32x4;
|
||||
#[link_name = "llvm.sqrt.v8f32"]
|
||||
fn sqrt_v8f32(x: f32x8) -> f32x8;
|
||||
#[link_name = "llvm.sqrt.v16f32"]
|
||||
fn sqrt_v16f32(x: f32x16) -> f32x16;
|
||||
#[link_name = "llvm.sqrt.v2f64"]
|
||||
fn sqrt_v2f64(x: f64x2) -> f64x2;
|
||||
#[link_name = "llvm.sqrt.v4f64"]
|
||||
fn sqrt_v4f64(x: f64x4) -> f64x4;
|
||||
#[link_name = "llvm.sqrt.v8f64"]
|
||||
fn sqrt_v8f64(x: f64x8) -> f64x8;
|
||||
}
|
||||
|
||||
pub(crate) trait FloatSqrt {
|
||||
fn sqrt(self) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! impl_fsqrt {
|
||||
($id:ident: $fn:ident) => {
|
||||
impl FloatSqrt for $id {
|
||||
fn sqrt(self) -> Self {
|
||||
unsafe { $fn(self) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl_fsqrt!(f32x2: sqrt_v2f32);
|
||||
impl_fsqrt!(f32x4: sqrt_v4f32);
|
||||
impl_fsqrt!(f32x8: sqrt_v8f32);
|
||||
impl_fsqrt!(f32x16: sqrt_v16f32);
|
||||
impl_fsqrt!(f64x2: sqrt_v2f64);
|
||||
impl_fsqrt!(f64x4: sqrt_v4f64);
|
||||
impl_fsqrt!(f64x8: sqrt_v8f64);
|
||||
|
|
@ -49,6 +49,9 @@ extern "platform-intrinsic" {
|
|||
pub fn simd_select<M, T>(m: M, a: T, b: T) -> T;
|
||||
|
||||
pub fn simd_fmin<T>(a: T, b: T) -> T;
|
||||
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416
|
||||
// pub fn simd_fmax<T>(a: T, b: T) -> T;
|
||||
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/416
|
||||
// pub fn simd_fmax<T>(a: T, b: T) -> T;
|
||||
|
||||
pub fn simd_fsqrt<T>(a: T) -> T;
|
||||
pub fn simd_fma<T>(a: T, b: T, c: T) -> T;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue