rename portable float vector's wrapping_{sum,product} to {sum,product} per the RFC
This commit is contained in:
parent
d62b7dbc64
commit
8ce74840ce
3 changed files with 247 additions and 171 deletions
|
|
@ -1,7 +1,7 @@
|
|||
//! Implements portable arithmetic vector reductions.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_arithmetic_reductions {
|
||||
macro_rules! impl_int_arithmetic_reductions {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl $id {
|
||||
/// Horizontal sum of the vector elements.
|
||||
|
|
@ -11,15 +11,8 @@ macro_rules! impl_arithmetic_reductions {
|
|||
///
|
||||
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
|
||||
///
|
||||
/// # Integer vectors
|
||||
///
|
||||
/// If an operation overflows it returns the mathematical result
|
||||
/// modulo `2^n` where `n` is the number of times it overflows.
|
||||
///
|
||||
/// # Floating-point vectors
|
||||
///
|
||||
/// If one of the vector element is `NaN` the reduction returns
|
||||
/// `NaN`.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn wrapping_sum(self) -> $elem_ty {
|
||||
|
|
@ -33,15 +26,8 @@ macro_rules! impl_arithmetic_reductions {
|
|||
///
|
||||
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
|
||||
///
|
||||
/// # Integer vectors
|
||||
///
|
||||
/// If an operation overflows it returns the mathematical result
|
||||
/// modulo `2^n` where `n` is the number of times it overflows.
|
||||
///
|
||||
/// # Floating-point vectors
|
||||
///
|
||||
/// If one of the vector element is `NaN` the reduction returns
|
||||
/// `NaN`.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn wrapping_sum(self) -> $elem_ty {
|
||||
|
|
@ -62,15 +48,8 @@ macro_rules! impl_arithmetic_reductions {
|
|||
///
|
||||
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
|
||||
///
|
||||
/// # Integer vectors
|
||||
///
|
||||
/// If an operation overflows it returns the mathematical result
|
||||
/// modulo `2^n` where `n` is the number of times it overflows.
|
||||
///
|
||||
/// # Floating-point vectors
|
||||
///
|
||||
/// If one of the vector element is `NaN` the reduction returns
|
||||
/// `NaN`.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn wrapping_product(self) -> $elem_ty {
|
||||
|
|
@ -84,15 +63,8 @@ macro_rules! impl_arithmetic_reductions {
|
|||
///
|
||||
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
|
||||
///
|
||||
/// # Integer vectors
|
||||
///
|
||||
/// If an operation overflows it returns the mathematical result
|
||||
/// modulo `2^n` where `n` is the number of times it overflows.
|
||||
///
|
||||
/// # Floating-point vectors
|
||||
///
|
||||
/// If one of the vector element is `NaN` the reduction returns
|
||||
/// `NaN`.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn wrapping_product(self) -> $elem_ty {
|
||||
|
|
@ -109,8 +81,93 @@ macro_rules! impl_arithmetic_reductions {
|
|||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_float_arithmetic_reductions {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl $id {
|
||||
/// Horizontal sum of the vector elements.
|
||||
///
|
||||
/// The intrinsic performs a tree-reduction of the vector elements.
|
||||
/// That is, for an 8 element vector:
|
||||
///
|
||||
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
|
||||
///
|
||||
/// If one of the vector element is `NaN` the reduction returns
|
||||
/// `NaN`. The resulting `NaN` is not required to be equal to any
|
||||
/// of the `NaN`s in the vector.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn sum(self) -> $elem_ty {
|
||||
use coresimd::simd_llvm::simd_reduce_add_ordered;
|
||||
unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
|
||||
}
|
||||
/// Horizontal sum of the vector elements.
|
||||
///
|
||||
/// The intrinsic performs a tree-reduction of the vector elements.
|
||||
/// That is, for an 8 element vector:
|
||||
///
|
||||
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
|
||||
///
|
||||
/// If one of the vector element is `NaN` the reduction returns
|
||||
/// `NaN`. The resulting `NaN` is not required to be equal to any
|
||||
/// of the `NaN`s in the vector.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn sum(self) -> $elem_ty {
|
||||
// FIXME: broken on AArch64
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=36796
|
||||
use super::codegen::wrapping::Wrapping;
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x = Wrapping::add(x, self.extract(i) as $elem_ty);
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
/// Horizontal product of the vector elements.
|
||||
///
|
||||
/// The intrinsic performs a tree-reduction of the vector elements.
|
||||
/// That is, for an 8 element vector:
|
||||
///
|
||||
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
|
||||
///
|
||||
/// If one of the vector element is `NaN` the reduction returns
|
||||
/// `NaN`. The resulting `NaN` is not required to be equal to any
|
||||
/// of the `NaN`s in the vector.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn product(self) -> $elem_ty {
|
||||
use coresimd::simd_llvm::simd_reduce_mul_ordered;
|
||||
unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
|
||||
}
|
||||
/// Horizontal product of the vector elements.
|
||||
///
|
||||
/// The intrinsic performs a tree-reduction of the vector elements.
|
||||
/// That is, for an 8 element vector:
|
||||
///
|
||||
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
|
||||
///
|
||||
/// If one of the vector element is `NaN` the reduction returns
|
||||
/// `NaN`. The resulting `NaN` is not required to be equal to any
|
||||
/// of the `NaN`s in the vector.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn product(self) -> $elem_ty {
|
||||
// FIXME: broken on AArch64
|
||||
// https://bugs.llvm.org/show_bug.cgi?id=36796
|
||||
use super::codegen::wrapping::Wrapping;
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x = Wrapping::mul(x, self.extract(i) as $elem_ty);
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_arithmetic_reductions {
|
||||
macro_rules! test_int_arithmetic_reductions {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
fn alternating(x: usize) -> ::coresimd::simd::$id {
|
||||
use coresimd::simd::$id;
|
||||
|
|
@ -157,3 +214,52 @@ macro_rules! test_arithmetic_reductions {
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_float_arithmetic_reductions {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
fn alternating(x: usize) -> ::coresimd::simd::$id {
|
||||
use coresimd::simd::$id;
|
||||
let mut v = $id::splat(1 as $elem_ty);
|
||||
for i in 0..$id::lanes() {
|
||||
if i % x == 0 {
|
||||
v = v.replace(i, 2 as $elem_ty);
|
||||
}
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sum() {
|
||||
use coresimd::simd::$id;
|
||||
let v = $id::splat(0 as $elem_ty);
|
||||
assert_eq!(v.sum(), 0 as $elem_ty);
|
||||
let v = $id::splat(1 as $elem_ty);
|
||||
assert_eq!(v.sum(), $id::lanes() as $elem_ty);
|
||||
let v = alternating(2);
|
||||
assert_eq!(
|
||||
v.sum(),
|
||||
($id::lanes() / 2 + $id::lanes()) as $elem_ty
|
||||
);
|
||||
}
|
||||
#[test]
|
||||
fn product() {
|
||||
use coresimd::simd::$id;
|
||||
let v = $id::splat(0 as $elem_ty);
|
||||
assert_eq!(v.product(), 0 as $elem_ty);
|
||||
let v = $id::splat(1 as $elem_ty);
|
||||
assert_eq!(v.product(), 1 as $elem_ty);
|
||||
let f = match $id::lanes() {
|
||||
64 => 16,
|
||||
32 => 8,
|
||||
16 => 4,
|
||||
_ => 2,
|
||||
};
|
||||
let v = alternating(f);
|
||||
assert_eq!(
|
||||
v.product(),
|
||||
(2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ macro_rules! simd_f_ty {
|
|||
[impl_cmp, $id, $mask_ty],
|
||||
[impl_arithmetic_ops, $id],
|
||||
[impl_arithmetic_scalar_ops, $id, $elem_ty],
|
||||
[impl_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_float_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_minmax_reductions, $id, $elem_ty],
|
||||
[impl_neg_op, $id, $elem_ty],
|
||||
[impl_partial_eq, $id],
|
||||
|
|
@ -157,7 +157,7 @@ macro_rules! simd_f_ty {
|
|||
test_cmp!($id, $elem_ty, $mask_ty, 1. as $elem_ty, 0. as $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_scalar_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_float_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_neg_op!($id, $elem_ty);
|
||||
test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty);
|
||||
|
|
@ -183,7 +183,7 @@ macro_rules! simd_i_ty {
|
|||
[impl_hash, $id, $elem_ty],
|
||||
[impl_arithmetic_ops, $id],
|
||||
[impl_arithmetic_scalar_ops, $id, $elem_ty],
|
||||
[impl_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_int_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_minmax_reductions, $id, $elem_ty],
|
||||
[impl_neg_op, $id, $elem_ty],
|
||||
[impl_bitwise_ops, $id, !(0 as $elem_ty)],
|
||||
|
|
@ -207,7 +207,7 @@ macro_rules! simd_i_ty {
|
|||
test_hash!($id, $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_scalar_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_int_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_neg_op!($id, $elem_ty);
|
||||
test_int_bitwise_ops!($id, $elem_ty);
|
||||
|
|
@ -238,7 +238,7 @@ macro_rules! simd_u_ty {
|
|||
[impl_hash, $id, $elem_ty],
|
||||
[impl_arithmetic_ops, $id],
|
||||
[impl_arithmetic_scalar_ops, $id, $elem_ty],
|
||||
[impl_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_int_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_minmax_reductions, $id, $elem_ty],
|
||||
[impl_bitwise_scalar_ops, $id, $elem_ty],
|
||||
[impl_bitwise_ops, $id, !(0 as $elem_ty)],
|
||||
|
|
@ -261,7 +261,7 @@ macro_rules! simd_u_ty {
|
|||
test_hash!($id, $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_scalar_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_int_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_int_bitwise_ops!($id, $elem_ty);
|
||||
test_int_bitwise_scalar_ops!($id, $elem_ty);
|
||||
|
|
|
|||
|
|
@ -186,7 +186,7 @@ fn max_nan() {
|
|||
finvoke!(max_nan_test);
|
||||
}
|
||||
|
||||
macro_rules! wrapping_sum_nan_test {
|
||||
macro_rules! sum_nan_test {
|
||||
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
|
||||
if $feature_macro!($feature) {
|
||||
#[target_feature(enable = $feature)]
|
||||
|
|
@ -202,19 +202,19 @@ macro_rules! wrapping_sum_nan_test {
|
|||
let mut v = v0.replace(i, n0);
|
||||
// If the vector contains a NaN the result is NaN:
|
||||
assert!(
|
||||
v.wrapping_sum().is_nan(),
|
||||
v.sum().is_nan(),
|
||||
"nan at {} => {} | {:?}",
|
||||
i,
|
||||
v.wrapping_sum(),
|
||||
v.sum(),
|
||||
v
|
||||
);
|
||||
for j in 0..i {
|
||||
v = v.replace(j, n0);
|
||||
assert!(v.wrapping_sum().is_nan());
|
||||
assert!(v.sum().is_nan());
|
||||
}
|
||||
}
|
||||
let v = $id::splat(n0);
|
||||
assert!(v.wrapping_sum().is_nan(), "all nans | {:?}", v);
|
||||
assert!(v.sum().is_nan(), "all nans | {:?}", v);
|
||||
}
|
||||
unsafe { test_fn() };
|
||||
}
|
||||
|
|
@ -222,11 +222,11 @@ macro_rules! wrapping_sum_nan_test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn wrapping_sum_nan() {
|
||||
finvoke!(wrapping_sum_nan_test);
|
||||
fn sum_nan() {
|
||||
finvoke!(sum_nan_test);
|
||||
}
|
||||
|
||||
macro_rules! wrapping_product_nan_test {
|
||||
macro_rules! product_nan_test {
|
||||
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
|
||||
if $feature_macro!($feature) {
|
||||
#[target_feature(enable = $feature)]
|
||||
|
|
@ -242,19 +242,19 @@ macro_rules! wrapping_product_nan_test {
|
|||
let mut v = v0.replace(i, n0);
|
||||
// If the vector contains a NaN the result is NaN:
|
||||
assert!(
|
||||
v.wrapping_product().is_nan(),
|
||||
v.product().is_nan(),
|
||||
"nan at {} | {:?}",
|
||||
i,
|
||||
v
|
||||
);
|
||||
for j in 0..i {
|
||||
v = v.replace(j, n0);
|
||||
assert!(v.wrapping_sum().is_nan());
|
||||
assert!(v.product().is_nan());
|
||||
}
|
||||
}
|
||||
let v = $id::splat(n0);
|
||||
assert!(
|
||||
v.wrapping_product().is_nan(),
|
||||
v.product().is_nan(),
|
||||
"all nans | {:?}",
|
||||
v
|
||||
);
|
||||
|
|
@ -265,8 +265,8 @@ macro_rules! wrapping_product_nan_test {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn wrapping_product_nan() {
|
||||
finvoke!(wrapping_product_nan_test);
|
||||
fn product_nan() {
|
||||
finvoke!(product_nan_test);
|
||||
}
|
||||
|
||||
trait AsInt {
|
||||
|
|
@ -304,133 +304,111 @@ as_int!(f64x8, i64x8);
|
|||
mod offset {
|
||||
use super::*;
|
||||
|
||||
trait TreeReduceAdd {
|
||||
trait TreeSum {
|
||||
type R;
|
||||
fn tree_reduce_add(self) -> Self::R;
|
||||
fn tree_sum(self) -> Self::R;
|
||||
}
|
||||
|
||||
macro_rules! tree_reduce_add_f {
|
||||
($elem_ty:ident) => {
|
||||
impl<'a> TreeReduceAdd for &'a [$elem_ty] {
|
||||
type R = $elem_ty;
|
||||
fn tree_reduce_add(self) -> $elem_ty {
|
||||
if self.len() == 2 {
|
||||
println!(" lv: {}, rv: {} => {}", self[0], self[1], self[0] + self[1]);
|
||||
self[0] + self[1]
|
||||
} else {
|
||||
let mid = self.len() / 2;
|
||||
let (left, right) = self.split_at(mid);
|
||||
println!(" splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
|
||||
Self::tree_reduce_add(left) + Self::tree_reduce_add(right)
|
||||
macro_rules! tree_sum_f {
|
||||
($elem_ty:ident) => {
|
||||
impl<'a> TreeSum for &'a [$elem_ty] {
|
||||
type R = $elem_ty;
|
||||
fn tree_sum(self) -> $elem_ty {
|
||||
if self.len() == 2 {
|
||||
self[0] + self[1]
|
||||
} else {
|
||||
let mid = self.len() / 2;
|
||||
let (left, right) = self.split_at(mid);
|
||||
Self::tree_sum(left) + Self::tree_sum(right)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
tree_reduce_add_f!(f32);
|
||||
tree_reduce_add_f!(f64);
|
||||
};
|
||||
}
|
||||
tree_sum_f!(f32);
|
||||
tree_sum_f!(f64);
|
||||
|
||||
macro_rules! wrapping_sum_roundoff_test {
|
||||
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
|
||||
if $feature_macro!($feature) {
|
||||
#[target_feature(enable = $feature)]
|
||||
unsafe fn test_fn() {
|
||||
let mut start = std::$elem_ty::EPSILON;
|
||||
let mut wrapping_sum = 0. as $elem_ty;
|
||||
macro_rules! sum_roundoff_test {
|
||||
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
|
||||
if $feature_macro!($feature) {
|
||||
#[target_feature(enable = $feature)]
|
||||
unsafe fn test_fn() {
|
||||
let mut start = std::$elem_ty::EPSILON;
|
||||
let mut sum = 0. as $elem_ty;
|
||||
|
||||
let mut v = $id::splat(0. as $elem_ty);
|
||||
for i in 0..$id::lanes() {
|
||||
let c = if i % 2 == 0 { 1e3 } else { -1. };
|
||||
start *= 3.14 * c;
|
||||
wrapping_sum += start;
|
||||
// println!("{} | start: {}", stringify!($id), start);
|
||||
v = v.replace(i, start);
|
||||
}
|
||||
let vwrapping_sum = v.wrapping_sum();
|
||||
println!(
|
||||
"{} | lwrapping_sum: {}",
|
||||
stringify!($id),
|
||||
wrapping_sum
|
||||
);
|
||||
println!(
|
||||
"{} | vwrapping_sum: {}",
|
||||
stringify!($id),
|
||||
vwrapping_sum
|
||||
);
|
||||
let r = vwrapping_sum.as_int() == wrapping_sum.as_int();
|
||||
// This is false in general; the intrinsic performs a
|
||||
// tree-reduce:
|
||||
println!("{} | equal: {}", stringify!($id), r);
|
||||
|
||||
let mut a = [0. as $elem_ty; $id::lanes()];
|
||||
v.store_unaligned(&mut a);
|
||||
|
||||
let twrapping_sum = a.tree_reduce_add();
|
||||
println!(
|
||||
"{} | twrapping_sum: {}",
|
||||
stringify!($id),
|
||||
twrapping_sum
|
||||
);
|
||||
|
||||
// tolerate 1 ULP difference:
|
||||
if vwrapping_sum.as_int() > twrapping_sum.as_int() {
|
||||
assert!(
|
||||
vwrapping_sum.as_int() - twrapping_sum.as_int()
|
||||
< 2,
|
||||
"v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
|
||||
v,
|
||||
vwrapping_sum,
|
||||
twrapping_sum
|
||||
);
|
||||
} else {
|
||||
assert!(
|
||||
twrapping_sum.as_int() - vwrapping_sum.as_int()
|
||||
< 2,
|
||||
"v: {:?} | vwrapping_sum: {} | twrapping_sum: {}",
|
||||
v,
|
||||
vwrapping_sum,
|
||||
twrapping_sum
|
||||
);
|
||||
let mut v = $id::splat(0. as $elem_ty);
|
||||
for i in 0..$id::lanes() {
|
||||
let c = if i % 2 == 0 { 1e3 } else { -1. };
|
||||
start *= 3.14 * c;
|
||||
sum += start;
|
||||
v = v.replace(i, start);
|
||||
}
|
||||
let vsum = v.sum();
|
||||
let r = vsum.as_int() == sum.as_int();
|
||||
// This is false in general; the intrinsic performs a
|
||||
// tree-reduce:
|
||||
let mut a = [0. as $elem_ty; $id::lanes()];
|
||||
v.store_unaligned(&mut a);
|
||||
|
||||
let tsum = a.tree_sum();
|
||||
|
||||
// tolerate 1 ULP difference:
|
||||
if vsum.as_int() > tsum.as_int() {
|
||||
assert!(
|
||||
vsum.as_int() - tsum.as_int()
|
||||
< 2,
|
||||
"v: {:?} | vsum: {} | tsum: {}",
|
||||
v,
|
||||
vsum,
|
||||
tsum
|
||||
);
|
||||
} else {
|
||||
assert!(
|
||||
tsum.as_int() - vsum.as_int()
|
||||
< 2,
|
||||
"v: {:?} | vsum: {} | tsum: {}",
|
||||
v,
|
||||
vsum,
|
||||
tsum
|
||||
);
|
||||
}
|
||||
}
|
||||
unsafe { test_fn() };
|
||||
}
|
||||
unsafe { test_fn() };
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrapping_sum_roundoff_test() {
|
||||
finvoke!(wrapping_sum_roundoff_test);
|
||||
fn sum_roundoff_test() {
|
||||
finvoke!(sum_roundoff_test);
|
||||
}
|
||||
|
||||
trait TreeReduceMul {
|
||||
trait TreeProduct {
|
||||
type R;
|
||||
fn tree_reduce_mul(self) -> Self::R;
|
||||
fn tree_product(self) -> Self::R;
|
||||
}
|
||||
|
||||
macro_rules! tree_reduce_mul_f {
|
||||
($elem_ty:ident) => {
|
||||
impl<'a> TreeReduceMul for &'a [$elem_ty] {
|
||||
type R = $elem_ty;
|
||||
fn tree_reduce_mul(self) -> $elem_ty {
|
||||
if self.len() == 2 {
|
||||
println!(" lv: {}, rv: {} => {}", self[0], self[1], self[0] * self[1]);
|
||||
self[0] * self[1]
|
||||
} else {
|
||||
let mid = self.len() / 2;
|
||||
let (left, right) = self.split_at(mid);
|
||||
println!(" splitting self: {:?} at mid {} into left: {:?}, right: {:?}", self, mid, self[0], self[1]);
|
||||
Self::tree_reduce_mul(left) * Self::tree_reduce_mul(right)
|
||||
macro_rules! tree_product_f {
|
||||
($elem_ty:ident) => {
|
||||
impl<'a> TreeProduct for &'a [$elem_ty] {
|
||||
type R = $elem_ty;
|
||||
fn tree_product(self) -> $elem_ty {
|
||||
if self.len() == 2 {
|
||||
self[0] * self[1]
|
||||
} else {
|
||||
let mid = self.len() / 2;
|
||||
let (left, right) = self.split_at(mid);
|
||||
Self::tree_product(left) * Self::tree_product(right)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
tree_reduce_mul_f!(f32);
|
||||
tree_reduce_mul_f!(f64);
|
||||
tree_product_f!(f32);
|
||||
tree_product_f!(f64);
|
||||
|
||||
macro_rules! wrapping_product_roundoff_test {
|
||||
macro_rules! product_roundoff_test {
|
||||
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
|
||||
if $feature_macro!($feature) {
|
||||
#[target_feature(enable = $feature)]
|
||||
|
|
@ -443,23 +421,16 @@ mod offset {
|
|||
let c = if i % 2 == 0 { 1e3 } else { -1. };
|
||||
start *= 3.14 * c;
|
||||
mul *= start;
|
||||
println!("{} | start: {}", stringify!($id), start);
|
||||
v = v.replace(i, start);
|
||||
}
|
||||
let vmul = v.wrapping_product();
|
||||
println!("{} | lmul: {}", stringify!($id), mul);
|
||||
println!("{} | vmul: {}", stringify!($id), vmul);
|
||||
let vmul = v.product();
|
||||
let r = vmul.as_int() == mul.as_int();
|
||||
// This is false in general; the intrinsic performs a
|
||||
// tree-reduce:
|
||||
println!("{} | equal: {}", stringify!($id), r);
|
||||
|
||||
let mut a = [0. as $elem_ty; $id::lanes()];
|
||||
v.store_unaligned(&mut a);
|
||||
|
||||
let tmul = a.tree_reduce_mul();
|
||||
println!("{} | tmul: {}", stringify!($id), tmul);
|
||||
|
||||
let tmul = a.tree_product();
|
||||
// tolerate 1 ULP difference:
|
||||
if vmul.as_int() > tmul.as_int() {
|
||||
assert!(
|
||||
|
|
@ -485,8 +456,8 @@ mod offset {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn wrapping_product_roundoff_test() {
|
||||
finvoke!(wrapping_product_roundoff_test);
|
||||
fn product_roundoff_test() {
|
||||
finvoke!(product_roundoff_test);
|
||||
}
|
||||
|
||||
macro_rules! wrapping_sum_overflow_test {
|
||||
|
|
@ -516,7 +487,7 @@ mod offset {
|
|||
iinvoke!(wrapping_sum_overflow_test);
|
||||
}
|
||||
|
||||
macro_rules! mul_overflow_test {
|
||||
macro_rules! product_overflow_test {
|
||||
($feature:tt, $feature_macro:ident, $id:ident, $elem_ty:ident) => {
|
||||
if $feature_macro!($feature) {
|
||||
#[target_feature(enable = $feature)]
|
||||
|
|
@ -539,8 +510,7 @@ mod offset {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn mul_overflow_test() {
|
||||
iinvoke!(mul_overflow_test);
|
||||
fn product_overflow_test() {
|
||||
iinvoke!(product_overflow_test);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue