rename portable float vector's wrapping_{sum,product} to {sum,product} per the RFC

This commit is contained in:
gnzlbg 2018-06-06 11:40:05 +02:00 committed by gnzlbg
parent d62b7dbc64
commit 8ce74840ce
3 changed files with 247 additions and 171 deletions

View file

@ -1,7 +1,7 @@
//! Implements portable arithmetic vector reductions.
#![allow(unused)]
macro_rules! impl_arithmetic_reductions {
macro_rules! impl_int_arithmetic_reductions {
($id:ident, $elem_ty:ident) => {
impl $id {
/// Horizontal sum of the vector elements.
@ -11,15 +11,8 @@ macro_rules! impl_arithmetic_reductions {
///
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(not(target_arch = "aarch64"))]
#[inline]
pub fn wrapping_sum(self) -> $elem_ty {
@ -33,15 +26,8 @@ macro_rules! impl_arithmetic_reductions {
///
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(target_arch = "aarch64")]
#[inline]
pub fn wrapping_sum(self) -> $elem_ty {
@ -62,15 +48,8 @@ macro_rules! impl_arithmetic_reductions {
///
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(not(target_arch = "aarch64"))]
#[inline]
pub fn wrapping_product(self) -> $elem_ty {
@ -84,15 +63,8 @@ macro_rules! impl_arithmetic_reductions {
///
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(target_arch = "aarch64")]
#[inline]
pub fn wrapping_product(self) -> $elem_ty {
@ -109,8 +81,93 @@ macro_rules! impl_arithmetic_reductions {
};
}
macro_rules! impl_float_arithmetic_reductions {
($id:ident, $elem_ty:ident) => {
impl $id {
/// Horizontal sum of the vector elements.
///
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`. The resulting `NaN` is not required to be equal to any
/// of the `NaN`s in the vector.
#[cfg(not(target_arch = "aarch64"))]
#[inline]
pub fn sum(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_add_ordered;
unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
}
/// Horizontal sum of the vector elements.
///
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`. The resulting `NaN` is not required to be equal to any
/// of the `NaN`s in the vector.
#[cfg(target_arch = "aarch64")]
#[inline]
pub fn sum(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use super::codegen::wrapping::Wrapping;
let mut x = self.extract(0) as $elem_ty;
for i in 1..$id::lanes() {
x = Wrapping::add(x, self.extract(i) as $elem_ty);
}
x
}
/// Horizontal product of the vector elements.
///
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`. The resulting `NaN` is not required to be equal to any
/// of the `NaN`s in the vector.
#[cfg(not(target_arch = "aarch64"))]
#[inline]
pub fn product(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_mul_ordered;
unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
}
/// Horizontal product of the vector elements.
///
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`. The resulting `NaN` is not required to be equal to any
/// of the `NaN`s in the vector.
#[cfg(target_arch = "aarch64")]
#[inline]
pub fn product(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use super::codegen::wrapping::Wrapping;
let mut x = self.extract(0) as $elem_ty;
for i in 1..$id::lanes() {
x = Wrapping::mul(x, self.extract(i) as $elem_ty);
}
x
}
}
};
}
#[cfg(test)]
macro_rules! test_arithmetic_reductions {
macro_rules! test_int_arithmetic_reductions {
($id:ident, $elem_ty:ident) => {
fn alternating(x: usize) -> ::coresimd::simd::$id {
use coresimd::simd::$id;
@ -157,3 +214,52 @@ macro_rules! test_arithmetic_reductions {
}
};
}
#[cfg(test)]
macro_rules! test_float_arithmetic_reductions {
($id:ident, $elem_ty:ident) => {
fn alternating(x: usize) -> ::coresimd::simd::$id {
use coresimd::simd::$id;
let mut v = $id::splat(1 as $elem_ty);
for i in 0..$id::lanes() {
if i % x == 0 {
v = v.replace(i, 2 as $elem_ty);
}
}
v
}
#[test]
fn sum() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.sum(), 0 as $elem_ty);
let v = $id::splat(1 as $elem_ty);
assert_eq!(v.sum(), $id::lanes() as $elem_ty);
let v = alternating(2);
assert_eq!(
v.sum(),
($id::lanes() / 2 + $id::lanes()) as $elem_ty
);
}
#[test]
fn product() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.product(), 0 as $elem_ty);
let v = $id::splat(1 as $elem_ty);
assert_eq!(v.product(), 1 as $elem_ty);
let f = match $id::lanes() {
64 => 16,
32 => 8,
16 => 4,
_ => 2,
};
let v = alternating(f);
assert_eq!(
v.product(),
(2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
);
}
};
}

View file

@ -140,7 +140,7 @@ macro_rules! simd_f_ty {
[impl_cmp, $id, $mask_ty],
[impl_arithmetic_ops, $id],
[impl_arithmetic_scalar_ops, $id, $elem_ty],
[impl_arithmetic_reductions, $id, $elem_ty],
[impl_float_arithmetic_reductions, $id, $elem_ty],
[impl_minmax_reductions, $id, $elem_ty],
[impl_neg_op, $id, $elem_ty],
[impl_partial_eq, $id],
@ -157,7 +157,7 @@ macro_rules! simd_f_ty {
test_cmp!($id, $elem_ty, $mask_ty, 1. as $elem_ty, 0. as $elem_ty);
test_arithmetic_ops!($id, $elem_ty);
test_arithmetic_scalar_ops!($id, $elem_ty);
test_arithmetic_reductions!($id, $elem_ty);
test_float_arithmetic_reductions!($id, $elem_ty);
test_minmax_reductions!($id, $elem_ty);
test_neg_op!($id, $elem_ty);
test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty);
@ -183,7 +183,7 @@ macro_rules! simd_i_ty {
[impl_hash, $id, $elem_ty],
[impl_arithmetic_ops, $id],
[impl_arithmetic_scalar_ops, $id, $elem_ty],
[impl_arithmetic_reductions, $id, $elem_ty],
[impl_int_arithmetic_reductions, $id, $elem_ty],
[impl_minmax_reductions, $id, $elem_ty],
[impl_neg_op, $id, $elem_ty],
[impl_bitwise_ops, $id, !(0 as $elem_ty)],
@ -207,7 +207,7 @@ macro_rules! simd_i_ty {
test_hash!($id, $elem_ty);
test_arithmetic_ops!($id, $elem_ty);
test_arithmetic_scalar_ops!($id, $elem_ty);
test_arithmetic_reductions!($id, $elem_ty);
test_int_arithmetic_reductions!($id, $elem_ty);
test_minmax_reductions!($id, $elem_ty);
test_neg_op!($id, $elem_ty);
test_int_bitwise_ops!($id, $elem_ty);
@ -238,7 +238,7 @@ macro_rules! simd_u_ty {
[impl_hash, $id, $elem_ty],
[impl_arithmetic_ops, $id],
[impl_arithmetic_scalar_ops, $id, $elem_ty],
[impl_arithmetic_reductions, $id, $elem_ty],
[impl_int_arithmetic_reductions, $id, $elem_ty],
[impl_minmax_reductions, $id, $elem_ty],
[impl_bitwise_scalar_ops, $id, $elem_ty],
[impl_bitwise_ops, $id, !(0 as $elem_ty)],
@ -261,7 +261,7 @@ macro_rules! simd_u_ty {
test_hash!($id, $elem_ty);
test_arithmetic_ops!($id, $elem_ty);
test_arithmetic_scalar_ops!($id, $elem_ty);
test_arithmetic_reductions!($id, $elem_ty);
test_int_arithmetic_reductions!($id, $elem_ty);
test_minmax_reductions!($id, $elem_ty);
test_int_bitwise_ops!($id, $elem_ty);
test_int_bitwise_scalar_ops!($id, $elem_ty);

View file

@ -186,7 +186,7 @@ fn max_nan() {
finvoke!(max_nan_test);
}
macro_rules! wrapping_sum_nan_test {
macro_rules! sum_nan_test {
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
if $feature_macro!($feature) {
#[target_feature(enable = $feature)]
@ -202,19 +202,19 @@ macro_rules! wrapping_sum_nan_test {
let mut v = v0.replace(i, n0);
// If the vector contains a NaN the result is NaN:
assert!(
v.wrapping_sum().is_nan(),
v.sum().is_nan(),
"nan at {} => {} | {:?}",
i,
v.wrapping_sum(),
v.sum(),
v
);
for j in 0..i {
v = v.replace(j, n0);
assert!(v.wrapping_sum().is_nan());
assert!(v.sum().is_nan());
}
}
let v = $id::splat(n0);
assert!(v.wrapping_sum().is_nan(), "all nans | {:?}", v);
assert!(v.sum().is_nan(), "all nans | {:?}", v);
}
unsafe { test_fn() };
}
@ -222,11 +222,11 @@ macro_rules! wrapping_sum_nan_test {
}
#[test]
fn wrapping_sum_nan() {
finvoke!(wrapping_sum_nan_test);
fn sum_nan() {
finvoke!(sum_nan_test);
}
macro_rules! wrapping_product_nan_test {
macro_rules! product_nan_test {
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
if $feature_macro!($feature) {
#[target_feature(enable = $feature)]
@ -242,19 +242,19 @@ macro_rules! wrapping_product_nan_test {
let mut v = v0.replace(i, n0);
// If the vector contains a NaN the result is NaN:
assert!(
v.wrapping_product().is_nan(),
v.product().is_nan(),
"nan at {} | {:?}",
i,
v
);
for j in 0..i {
v = v.replace(j, n0);
assert!(v.wrapping_sum().is_nan());
assert!(v.product().is_nan());
}
}
let v = $id::splat(n0);
assert!(
v.wrapping_product().is_nan(),
v.product().is_nan(),
"all nans | {:?}",
v
);
@ -265,8 +265,8 @@ macro_rules! wrapping_product_nan_test {
}
#[test]
fn wrapping_product_nan() {
finvoke!(wrapping_product_nan_test);
fn product_nan() {
finvoke!(product_nan_test);
}
trait AsInt {
@ -304,133 +304,111 @@ as_int!(f64x8, i64x8);
mod offset {
use super::*;
trait TreeReduceAdd {
trait TreeSum {
type R;
fn tree_reduce_add(self) -> Self::R;
fn tree_sum(self) -> Self::R;
}
macro_rules! tree_reduce_add_f {
($elem_ty:ident) => {
impl<'a> TreeReduceAdd for &'a [$elem_ty] {
type R = $elem_ty;
fn tree_reduce_add(self) -> $elem_ty {
if self.len() == 2 {
println!(" lv: {}, rv: {} => {}", self[0], self[1], self[0] + self[1]);
self[0] + self[1]
} else {
let mid = self.len() / 2;
let (left, right) = self.split_at(mid);
println!(" splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
Self::tree_reduce_add(left) + Self::tree_reduce_add(right)
macro_rules! tree_sum_f {
($elem_ty:ident) => {
impl<'a> TreeSum for &'a [$elem_ty] {
type R = $elem_ty;
fn tree_sum(self) -> $elem_ty {
if self.len() == 2 {
self[0] + self[1]
} else {
let mid = self.len() / 2;
let (left, right) = self.split_at(mid);
Self::tree_sum(left) + Self::tree_sum(right)
}
}
}
}
};
}
tree_reduce_add_f!(f32);
tree_reduce_add_f!(f64);
};
}
tree_sum_f!(f32);
tree_sum_f!(f64);
macro_rules! wrapping_sum_roundoff_test {
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
if $feature_macro!($feature) {
#[target_feature(enable = $feature)]
unsafe fn test_fn() {
let mut start = std::$elem_ty::EPSILON;
let mut wrapping_sum = 0. as $elem_ty;
macro_rules! sum_roundoff_test {
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
if $feature_macro!($feature) {
#[target_feature(enable = $feature)]
unsafe fn test_fn() {
let mut start = std::$elem_ty::EPSILON;
let mut sum = 0. as $elem_ty;
let mut v = $id::splat(0. as $elem_ty);
for i in 0..$id::lanes() {
let c = if i % 2 == 0 { 1e3 } else { -1. };
start *= 3.14 * c;
wrapping_sum += start;
// println!("{} | start: {}", stringify!($id), start);
v = v.replace(i, start);
}
let vwrapping_sum = v.wrapping_sum();
println!(
"{} | lwrapping_sum: {}",
stringify!($id),
wrapping_sum
);
println!(
"{} | vwrapping_sum: {}",
stringify!($id),
vwrapping_sum
);
let r = vwrapping_sum.as_int() == wrapping_sum.as_int();
// This is false in general; the intrinsic performs a
// tree-reduce:
println!("{} | equal: {}", stringify!($id), r);
let mut a = [0. as $elem_ty; $id::lanes()];
v.store_unaligned(&mut a);
let twrapping_sum = a.tree_reduce_add();
println!(
"{} | twrapping_sum: {}",
stringify!($id),
twrapping_sum
);
// tolerate 1 ULP difference:
if vwrapping_sum.as_int() > twrapping_sum.as_int() {
assert!(
vwrapping_sum.as_int() - twrapping_sum.as_int()
< 2,
"v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
v,
vwrapping_sum,
twrapping_sum
);
} else {
assert!(
twrapping_sum.as_int() - vwrapping_sum.as_int()
< 2,
"v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
v,
vwrapping_sum,
twrapping_sum
);
let mut v = $id::splat(0. as $elem_ty);
for i in 0..$id::lanes() {
let c = if i % 2 == 0 { 1e3 } else { -1. };
start *= 3.14 * c;
sum += start;
v = v.replace(i, start);
}
let vsum = v.sum();
let r = vsum.as_int() == sum.as_int();
// This is false in general; the intrinsic performs a
// tree-reduce:
let mut a = [0. as $elem_ty; $id::lanes()];
v.store_unaligned(&mut a);
let tsum = a.tree_sum();
// tolerate 1 ULP difference:
if vsum.as_int() > tsum.as_int() {
assert!(
vsum.as_int() - tsum.as_int()
< 2,
"v: {:?} | vsum: {} | tsum: {}",
v,
vsum,
tsum
);
} else {
assert!(
tsum.as_int() - vsum.as_int()
< 2,
"v: {:?} | vsum: {} | tsum: {}",
v,
vsum,
tsum
);
}
}
unsafe { test_fn() };
}
unsafe { test_fn() };
}
};
}
};
}
#[test]
fn wrapping_sum_roundoff_test() {
finvoke!(wrapping_sum_roundoff_test);
fn sum_roundoff_test() {
finvoke!(sum_roundoff_test);
}
trait TreeReduceMul {
trait TreeProduct {
type R;
fn tree_reduce_mul(self) -> Self::R;
fn tree_product(self) -> Self::R;
}
macro_rules! tree_reduce_mul_f {
($elem_ty:ident) => {
impl<'a> TreeReduceMul for &'a [$elem_ty] {
type R = $elem_ty;
fn tree_reduce_mul(self) -> $elem_ty {
if self.len() == 2 {
println!(" lv: {}, rv: {} => {}", self[0], self[1], self[0] * self[1]);
self[0] * self[1]
} else {
let mid = self.len() / 2;
let (left, right) = self.split_at(mid);
println!(" splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
Self::tree_reduce_mul(left) * Self::tree_reduce_mul(right)
macro_rules! tree_product_f {
($elem_ty:ident) => {
impl<'a> TreeProduct for &'a [$elem_ty] {
type R = $elem_ty;
fn tree_product(self) -> $elem_ty {
if self.len() == 2 {
self[0] * self[1]
} else {
let mid = self.len() / 2;
let (left, right) = self.split_at(mid);
Self::tree_product(left) * Self::tree_product(right)
}
}
}
}
};
}
};
}
tree_reduce_mul_f!(f32);
tree_reduce_mul_f!(f64);
tree_product_f!(f32);
tree_product_f!(f64);
macro_rules! wrapping_product_roundoff_test {
macro_rules! product_roundoff_test {
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
if $feature_macro!($feature) {
#[target_feature(enable = $feature)]
@ -443,23 +421,16 @@ mod offset {
let c = if i % 2 == 0 { 1e3 } else { -1. };
start *= 3.14 * c;
mul *= start;
println!("{} | start: {}", stringify!($id), start);
v = v.replace(i, start);
}
let vmul = v.wrapping_product();
println!("{} | lmul: {}", stringify!($id), mul);
println!("{} | vmul: {}", stringify!($id), vmul);
let vmul = v.product();
let r = vmul.as_int() == mul.as_int();
// This is false in general; the intrinsic performs a
// tree-reduce:
println!("{} | equal: {}", stringify!($id), r);
let mut a = [0. as $elem_ty; $id::lanes()];
v.store_unaligned(&mut a);
let tmul = a.tree_reduce_mul();
println!("{} | tmul: {}", stringify!($id), tmul);
let tmul = a.tree_product();
// tolerate 1 ULP difference:
if vmul.as_int() > tmul.as_int() {
assert!(
@ -485,8 +456,8 @@ mod offset {
}
#[test]
fn wrapping_product_roundoff_test() {
finvoke!(wrapping_product_roundoff_test);
fn product_roundoff_test() {
finvoke!(product_roundoff_test);
}
macro_rules! wrapping_sum_overflow_test {
@ -516,7 +487,7 @@ mod offset {
iinvoke!(wrapping_sum_overflow_test);
}
macro_rules! mul_overflow_test {
macro_rules! product_overflow_test {
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
if $feature_macro!($feature) {
#[target_feature(enable = $feature)]
@ -539,8 +510,7 @@ mod offset {
}
#[test]
fn mul_overflow_test() {
iinvoke!(mul_overflow_test);
fn product_overflow_test() {
iinvoke!(product_overflow_test);
}
}