Split protable vector types tests into multiple crates (#379)
* split the portable vector tests into separate crates * use rustc reductions
This commit is contained in:
parent
44763c853d
commit
68c53c1e55
44 changed files with 1044 additions and 1761 deletions
|
|
@ -2,21 +2,14 @@
|
|||
|
||||
set -ex
|
||||
|
||||
: ${TARGET?"The TARGET environment variable must be set."}
|
||||
|
||||
# Tests are all super fast anyway, and they fault often enough on travis that
|
||||
# having only one thread increases debuggability to be worth it.
|
||||
export RUST_TEST_THREADS=1
|
||||
#export RUST_BACKTRACE=1
|
||||
#export RUST_TEST_NOCAPTURE=1
|
||||
|
||||
# FIXME(rust-lang-nursery/stdsimd#120) run-time feature detection for ARM Neon
|
||||
case ${TARGET} in
|
||||
aarch*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+neon"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
||||
FEATURES="strict,$FEATURES"
|
||||
|
||||
echo "RUSTFLAGS=${RUSTFLAGS}"
|
||||
|
|
@ -25,7 +18,8 @@ echo "OBJDUMP=${OBJDUMP}"
|
|||
|
||||
cargo_test() {
|
||||
cmd="cargo test --target=$TARGET --features $FEATURES $1"
|
||||
cmd="$cmd -p coresimd -p stdsimd --manifest-path crates/stdsimd/Cargo.toml"
|
||||
cmd="$cmd -p coresimd -p stdsimd"
|
||||
cmd="$cmd --manifest-path crates/stdsimd/Cargo.toml"
|
||||
cmd="$cmd -- $2"
|
||||
$cmd
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,76 +1,82 @@
|
|||
//! Lane-wise arithmetic operations.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_arithmetic_ops {
|
||||
($id:ident) => {
|
||||
impl ops::Add for $id {
|
||||
impl ::ops::Add for $id {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn add(self, other: Self) -> Self {
|
||||
use coresimd::simd_llvm::simd_add;
|
||||
unsafe { simd_add(self, other) }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Sub for $id {
|
||||
impl ::ops::Sub for $id {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn sub(self, other: Self) -> Self {
|
||||
use coresimd::simd_llvm::simd_sub;
|
||||
unsafe { simd_sub(self, other) }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Mul for $id {
|
||||
impl ::ops::Mul for $id {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn mul(self, other: Self) -> Self {
|
||||
use coresimd::simd_llvm::simd_mul;
|
||||
unsafe { simd_mul(self, other) }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Div for $id {
|
||||
impl ::ops::Div for $id {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn div(self, other: Self) -> Self {
|
||||
use coresimd::simd_llvm::simd_div;
|
||||
unsafe { simd_div(self, other) }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Rem for $id {
|
||||
impl ::ops::Rem for $id {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn rem(self, other: Self) -> Self {
|
||||
use coresimd::simd_llvm::simd_rem;
|
||||
unsafe { simd_rem(self, other) }
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::AddAssign for $id {
|
||||
impl ::ops::AddAssign for $id {
|
||||
#[inline]
|
||||
fn add_assign(&mut self, other: Self) {
|
||||
*self = *self + other;
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::SubAssign for $id {
|
||||
impl ::ops::SubAssign for $id {
|
||||
#[inline]
|
||||
fn sub_assign(&mut self, other: Self) {
|
||||
*self = *self - other;
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::MulAssign for $id {
|
||||
impl ::ops::MulAssign for $id {
|
||||
#[inline]
|
||||
fn mul_assign(&mut self, other: Self) {
|
||||
*self = *self * other;
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::DivAssign for $id {
|
||||
impl ::ops::DivAssign for $id {
|
||||
#[inline]
|
||||
fn div_assign(&mut self, other: Self) {
|
||||
*self = *self / other;
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::RemAssign for $id {
|
||||
impl ::ops::RemAssign for $id {
|
||||
#[inline]
|
||||
fn rem_assign(&mut self, other: Self) {
|
||||
*self = *self % other;
|
||||
|
|
@ -80,7 +86,6 @@ macro_rules! impl_arithmetic_ops {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_arithmetic_ops {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,17 +1,65 @@
|
|||
//! Implements portable arithmetic vector reductions.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_arithmetic_reductions {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl $id {
|
||||
/// Lane-wise addition of the vector elements.
|
||||
///
|
||||
/// FIXME: document guarantees with respect to:
|
||||
/// * integers: overflow behavior
|
||||
/// * floats: order and NaNs
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn sum(self) -> $elem_ty {
|
||||
ReduceAdd::reduce_add(self)
|
||||
use ::coresimd::simd_llvm::simd_reduce_add_ordered;
|
||||
unsafe {
|
||||
simd_reduce_add_ordered(self, 0 as $elem_ty)
|
||||
}
|
||||
}
|
||||
/// Lane-wise addition of the vector elements.
|
||||
///
|
||||
/// FIXME: document guarantees with respect to:
|
||||
/// * integers: overflow behavior
|
||||
/// * floats: order and NaNs
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn sum(self) -> $elem_ty {
|
||||
// FIXME: broken on AArch64
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x += self.extract(i) as $elem_ty;
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
/// Lane-wise multiplication of the vector elements.
|
||||
///
|
||||
/// FIXME: document guarantees with respect to:
|
||||
/// * integers: overflow behavior
|
||||
/// * floats: order and NaNs
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn product(self) -> $elem_ty {
|
||||
ReduceMul::reduce_mul(self)
|
||||
use ::coresimd::simd_llvm::simd_reduce_mul_ordered;
|
||||
unsafe {
|
||||
simd_reduce_mul_ordered(self, 1 as $elem_ty)
|
||||
}
|
||||
}
|
||||
/// Lane-wise multiplication of the vector elements.
|
||||
///
|
||||
/// FIXME: document guarantees with respect to:
|
||||
/// * integers: overflow behavior
|
||||
/// * floats: order and NaNs
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn product(self) -> $elem_ty {
|
||||
// FIXME: broken on AArch64
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x *= self.extract(i) as $elem_ty;
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,48 +1,52 @@
|
|||
//! Lane-wise bitwise operations for integer and boolean vectors.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_bitwise_ops {
|
||||
($ty:ident, $true_val:expr) => {
|
||||
impl ops::Not for $ty {
|
||||
impl ::ops::Not for $ty {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn not(self) -> Self {
|
||||
Self::splat($true_val) ^ self
|
||||
}
|
||||
}
|
||||
impl ops::BitXor for $ty {
|
||||
impl ::ops::BitXor for $ty {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitxor(self, other: Self) -> Self {
|
||||
use coresimd::simd_llvm::simd_xor;
|
||||
unsafe { simd_xor(self, other) }
|
||||
}
|
||||
}
|
||||
impl ops::BitAnd for $ty {
|
||||
impl ::ops::BitAnd for $ty {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitand(self, other: Self) -> Self {
|
||||
use coresimd::simd_llvm::simd_and;
|
||||
unsafe { simd_and(self, other) }
|
||||
}
|
||||
}
|
||||
impl ops::BitOr for $ty {
|
||||
impl ::ops::BitOr for $ty {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn bitor(self, other: Self) -> Self {
|
||||
use coresimd::simd_llvm::simd_or;
|
||||
unsafe { simd_or(self, other) }
|
||||
}
|
||||
}
|
||||
impl ops::BitAndAssign for $ty {
|
||||
impl ::ops::BitAndAssign for $ty {
|
||||
#[inline]
|
||||
fn bitand_assign(&mut self, other: Self) {
|
||||
*self = *self & other;
|
||||
}
|
||||
}
|
||||
impl ops::BitOrAssign for $ty {
|
||||
impl ::ops::BitOrAssign for $ty {
|
||||
#[inline]
|
||||
fn bitor_assign(&mut self, other: Self) {
|
||||
*self = *self | other;
|
||||
}
|
||||
}
|
||||
impl ops::BitXorAssign for $ty {
|
||||
impl ::ops::BitXorAssign for $ty {
|
||||
#[inline]
|
||||
fn bitxor_assign(&mut self, other: Self) {
|
||||
*self = *self ^ other;
|
||||
|
|
@ -52,7 +56,6 @@ macro_rules! impl_bitwise_ops {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_int_bitwise_ops {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
#[test]
|
||||
|
|
@ -117,7 +120,6 @@ macro_rules! test_int_bitwise_ops {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_bool_bitwise_ops {
|
||||
($id:ident) => {
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,44 +1,142 @@
|
|||
//! Implements portable bitwise vector reductions.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_bitwise_reductions {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl $id {
|
||||
/// Lane-wise bitwise `and` of the vector elements.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn and(self) -> $elem_ty {
|
||||
ReduceAnd::reduce_and(self)
|
||||
use ::coresimd::simd_llvm::simd_reduce_and;
|
||||
unsafe {
|
||||
simd_reduce_and(self)
|
||||
}
|
||||
}
|
||||
/// Lane-wise bitwise `and` of the vector elements.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn and(self) -> $elem_ty {
|
||||
// FIXME: broken on aarch64
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x &= self.extract(i) as $elem_ty;
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
/// Lane-wise bitwise `or` of the vector elements.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn or(self) -> $elem_ty {
|
||||
ReduceOr::reduce_or(self)
|
||||
use ::coresimd::simd_llvm::simd_reduce_or;
|
||||
unsafe {
|
||||
simd_reduce_or(self)
|
||||
}
|
||||
}
|
||||
/// Lane-wise bitwise `or` of the vector elements.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn or(self) -> $elem_ty {
|
||||
// FIXME: broken on aarch64
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x |= self.extract(i) as $elem_ty;
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
/// Lane-wise bitwise `xor` of the vector elements.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn xor(self) -> $elem_ty {
|
||||
ReduceXor::reduce_xor(self)
|
||||
use ::coresimd::simd_llvm::simd_reduce_xor;
|
||||
unsafe {
|
||||
simd_reduce_xor(self)
|
||||
}
|
||||
}
|
||||
/// Lane-wise bitwise `xor` of the vector elements.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn xor(self) -> $elem_ty {
|
||||
// FIXME: broken on aarch64
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x ^= self.extract(i) as $elem_ty;
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_bool_bitwise_reductions {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
($id:ident, $elem_ty:ident, $internal_ty:ident) => {
|
||||
impl $id {
|
||||
/// Lane-wise bitwise `and` of the vector elements.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn and(self) -> $elem_ty {
|
||||
ReduceAnd::reduce_and(self) !=0
|
||||
use ::coresimd::simd_llvm::simd_reduce_and;
|
||||
unsafe {
|
||||
let r: $internal_ty = simd_reduce_and(self);
|
||||
r != 0
|
||||
}
|
||||
}
|
||||
/// Lane-wise bitwise `and` of the vector elements.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn and(self) -> $elem_ty {
|
||||
// FIXME: broken on aarch64
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x &= self.extract(i) as $elem_ty;
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
/// Lane-wise bitwise `or` of the vector elements.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn or(self) -> $elem_ty {
|
||||
ReduceOr::reduce_or(self) != 0
|
||||
use ::coresimd::simd_llvm::simd_reduce_or;
|
||||
unsafe {
|
||||
let r: $internal_ty = simd_reduce_or(self);
|
||||
r != 0
|
||||
}
|
||||
}
|
||||
/// Lane-wise bitwise `or` of the vector elements.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn or(self) -> $elem_ty {
|
||||
// FIXME: broken on aarch64
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x |= self.extract(i) as $elem_ty;
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
/// Lane-wise bitwise `xor` of the vector elements.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn xor(self) -> $elem_ty {
|
||||
ReduceXor::reduce_xor(self) != 0
|
||||
use ::coresimd::simd_llvm::simd_reduce_xor;
|
||||
unsafe {
|
||||
let r: $internal_ty = simd_reduce_xor(self);
|
||||
r != 0
|
||||
}
|
||||
}
|
||||
/// Lane-wise bitwise `xor` of the vector elements.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn xor(self) -> $elem_ty {
|
||||
// FIXME: broken on aarch64
|
||||
let mut x = self.extract(0) as $elem_ty;
|
||||
for i in 1..$id::lanes() {
|
||||
x ^= self.extract(i) as $elem_ty;
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
//! Minimal boolean vector implementation
|
||||
#![allow(unused)]
|
||||
|
||||
/// Minimal interface: all packed SIMD boolean vector types implement this.
|
||||
macro_rules! impl_bool_minimal {
|
||||
($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => {
|
||||
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(expl_impl_clone_on_copy))]
|
||||
impl Clone for $id {
|
||||
impl ::clone::Clone for $id {
|
||||
#[inline] // currently needed for correctness
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
|
|
@ -59,6 +60,7 @@ macro_rules! impl_bool_minimal {
|
|||
/// If `index >= Self::lanes()` the behavior is undefined.
|
||||
#[inline]
|
||||
pub unsafe fn extract_unchecked(self, index: usize) -> bool {
|
||||
use coresimd::simd_llvm::simd_extract;
|
||||
let x: $elem_ty = simd_extract(self, index as u32);
|
||||
x != 0
|
||||
}
|
||||
|
|
@ -87,6 +89,7 @@ macro_rules! impl_bool_minimal {
|
|||
index: usize,
|
||||
new_value: bool,
|
||||
) -> Self {
|
||||
use coresimd::simd_llvm::simd_insert;
|
||||
simd_insert(self, index as u32, Self::bool_to_internal(new_value))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,22 +1,47 @@
|
|||
//! Lane-wise boolean vector reductions.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_bool_reductions {
|
||||
($id:ident) => {
|
||||
impl $id {
|
||||
/// Are `all` vector lanes `true`?
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn all(self) -> bool {
|
||||
use ::coresimd::simd_llvm::simd_reduce_all;
|
||||
unsafe {
|
||||
simd_reduce_all(self)
|
||||
}
|
||||
}
|
||||
/// Are `all` vector lanes `true`?
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn all(self) -> bool {
|
||||
// FIXME: Broken on AArch64
|
||||
self.and()
|
||||
}
|
||||
|
||||
/// Is `any` vector lanes `true`?
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn any(self) -> bool {
|
||||
use ::coresimd::simd_llvm::simd_reduce_any;
|
||||
unsafe {
|
||||
simd_reduce_any(self)
|
||||
}
|
||||
}
|
||||
/// Is `any` vector lanes `true`?
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
pub fn any(self) -> bool {
|
||||
// FIXME: Broken on AArch64
|
||||
self.or()
|
||||
}
|
||||
|
||||
/// Are `all` vector lanes `false`?
|
||||
#[inline]
|
||||
pub fn none(self) -> bool {
|
||||
!self.or()
|
||||
!self.any()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
//! Lane-wise vector comparisons returning boolean vectors.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_cmp {
|
||||
($id:ident, $bool_ty:ident) => {
|
||||
|
|
@ -6,36 +7,42 @@ macro_rules! impl_cmp {
|
|||
/// Lane-wise equality comparison.
|
||||
#[inline]
|
||||
pub fn eq(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_eq;
|
||||
unsafe { simd_eq(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise inequality comparison.
|
||||
#[inline]
|
||||
pub fn ne(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_ne;
|
||||
unsafe { simd_ne(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise less-than comparison.
|
||||
#[inline]
|
||||
pub fn lt(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_lt;
|
||||
unsafe { simd_lt(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise less-than-or-equals comparison.
|
||||
#[inline]
|
||||
pub fn le(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_le;
|
||||
unsafe { simd_le(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise greater-than comparison.
|
||||
#[inline]
|
||||
pub fn gt(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_gt;
|
||||
unsafe { simd_gt(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise greater-than-or-equals comparison.
|
||||
#[inline]
|
||||
pub fn ge(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_ge;
|
||||
unsafe { simd_ge(self, other) }
|
||||
}
|
||||
}
|
||||
|
|
@ -48,36 +55,42 @@ macro_rules! impl_bool_cmp {
|
|||
/// Lane-wise equality comparison.
|
||||
#[inline]
|
||||
pub fn eq(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_eq;
|
||||
unsafe { simd_eq(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise inequality comparison.
|
||||
#[inline]
|
||||
pub fn ne(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_ne;
|
||||
unsafe { simd_ne(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise less-than comparison.
|
||||
#[inline]
|
||||
pub fn lt(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_gt;
|
||||
unsafe { simd_gt(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise less-than-or-equals comparison.
|
||||
#[inline]
|
||||
pub fn le(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_ge;
|
||||
unsafe { simd_ge(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise greater-than comparison.
|
||||
#[inline]
|
||||
pub fn gt(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_lt;
|
||||
unsafe { simd_lt(self, other) }
|
||||
}
|
||||
|
||||
/// Lane-wise greater-than-or-equals comparison.
|
||||
#[inline]
|
||||
pub fn ge(self, other: $id) -> $bool_ty {
|
||||
use coresimd::simd_llvm::simd_le;
|
||||
unsafe { simd_le(self, other) }
|
||||
}
|
||||
}
|
||||
|
|
@ -85,7 +98,6 @@ macro_rules! impl_bool_cmp {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_cmp {
|
||||
($id:ident, $elem_ty:ident, $bool_ty:ident,
|
||||
$true:expr, $false:expr) => {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
//! Implements `Default` for vector types.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_default {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl Default for $id {
|
||||
impl ::default::Default for $id {
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
Self::splat($elem_ty::default())
|
||||
|
|
@ -12,13 +13,11 @@ macro_rules! impl_default {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_default {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
#[test]
|
||||
fn default() {
|
||||
use ::coresimd::simd::*;
|
||||
use std::default::Default;
|
||||
use ::coresimd::simd::{$id};
|
||||
let a = $id::default();
|
||||
for i in 0..$id::lanes() {
|
||||
assert_eq!(a.extract(i), $elem_ty::default());
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
//! Implements `Eq` for vector types.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_eq {
|
||||
($id:ident) => { impl Eq for $id {} }
|
||||
($id:ident) => { impl ::cmp::Eq for $id {} }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
//! Implements formating traits.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_hex_fmt {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl fmt::LowerHex for $id {
|
||||
fn fmt(&self, f: &mut fmt::Formatter)
|
||||
-> fmt::Result {
|
||||
impl ::fmt::LowerHex for $id {
|
||||
fn fmt(&self, f: &mut ::fmt::Formatter)
|
||||
-> ::fmt::Result {
|
||||
use ::mem;
|
||||
write!(f, "{}(", stringify!($id))?;
|
||||
let n = mem::size_of_val(self)
|
||||
/ mem::size_of::<$elem_ty>();
|
||||
|
|
|
|||
|
|
@ -1,32 +1,48 @@
|
|||
//! Implements the From trait for vector types, which performs a lane-wise
|
||||
//! cast vector types with the same number of lanes.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_from {
|
||||
($to:ident: $elem_ty:ident, $test_mod:ident | $($from:ident),+) => {
|
||||
$(
|
||||
impl From<::simd::$from> for $to {
|
||||
#[inline]
|
||||
fn from(f: ::simd::$from) -> $to {
|
||||
unsafe { simd_cast(f) }
|
||||
}
|
||||
macro_rules! impl_from_impl {
|
||||
($from:ident, $to:ident) => {
|
||||
impl ::convert::From<::simd::$from> for $to {
|
||||
#[inline]
|
||||
fn from(f: ::simd::$from) -> $to {
|
||||
use coresimd::simd_llvm::simd_cast;
|
||||
unsafe { simd_cast(f) }
|
||||
}
|
||||
)+
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
$(
|
||||
#[test]
|
||||
fn $from() {
|
||||
use ::std::convert::{From, Into};
|
||||
use ::coresimd::simd::{$from, $to};
|
||||
use ::std::default::Default;
|
||||
assert_eq!($to::lanes(), $from::lanes());
|
||||
let a: $from = $from::default();
|
||||
let b_0: $to = From::from(a);
|
||||
let b_1: $to = a.into();
|
||||
assert_eq!(b_0, b_1);
|
||||
}
|
||||
)+
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_from_ {
|
||||
($to:ident, $from:ident) => {
|
||||
vector_impl!([impl_from_impl, $to, $from]);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_from {
|
||||
($to:ident: $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($from:ident),+) => {
|
||||
$(
|
||||
impl_from_!($from, $to);
|
||||
)+
|
||||
|
||||
$test_macro!(
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
$(
|
||||
#[test]
|
||||
fn $from() {
|
||||
use std::convert::{From, Into};
|
||||
use ::coresimd::simd::{$from, $to};
|
||||
use ::std::default::Default;
|
||||
assert_eq!($to::lanes(), $from::lanes());
|
||||
let a: $from = $from::default();
|
||||
let b_0: $to = From::from(a);
|
||||
let b_1: $to = a.into();
|
||||
assert_eq!(b_0, b_1);
|
||||
}
|
||||
)+
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,39 +1,47 @@
|
|||
//! Implements the `FromBits` trait for vector types, which performs bitwise
|
||||
//! lossless transmutes between equally-sized vector types.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_from_bits_ {
|
||||
macro_rules! impl_from_bits__ {
|
||||
($to:ident: $($from:ident),+) => {
|
||||
$(
|
||||
impl ::simd::FromBits<$from> for $to {
|
||||
#[inline]
|
||||
fn from_bits(f: $from) -> $to {
|
||||
unsafe { mem::transmute(f) }
|
||||
unsafe { ::mem::transmute(f) }
|
||||
}
|
||||
}
|
||||
)+
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_from_bits {
|
||||
($to:ident: $elem_ty:ident, $test_mod:ident | $($from:ident),+) => {
|
||||
impl_from_bits_!($to: $($from),+);
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
$(
|
||||
#[test]
|
||||
fn $from() {
|
||||
use ::coresimd::simd::{$from, $to, FromBits, IntoBits};
|
||||
use ::std::{mem, default};
|
||||
use default::Default;
|
||||
assert_eq!(mem::size_of::<$from>(),
|
||||
mem::size_of::<$to>());
|
||||
let a: $from = $from::default();
|
||||
let b_0: $to = FromBits::from_bits(a);
|
||||
let b_1: $to = a.into_bits();
|
||||
assert_eq!(b_0, b_1);
|
||||
}
|
||||
)+
|
||||
}
|
||||
macro_rules! impl_from_bits_ {
|
||||
($to:ident: $($from:ident),+) => {
|
||||
vector_impl!([impl_from_bits__, $to: $($from),+]);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_from_bits {
|
||||
($to:ident: $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($from:ident),+) => {
|
||||
impl_from_bits_!($to: $($from),+);
|
||||
|
||||
$test_macro!(
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
$(
|
||||
#[test]
|
||||
fn $from() {
|
||||
use ::coresimd::simd::*;
|
||||
use ::std::mem;
|
||||
assert_eq!(mem::size_of::<$from>(),
|
||||
mem::size_of::<$to>());
|
||||
let a: $from = $from::default();
|
||||
let b_0: $to = FromBits::from_bits(a);
|
||||
let b_1: $to = a.into_bits();
|
||||
assert_eq!(b_0, b_1);
|
||||
}
|
||||
)+
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
//! Implements `Hash`.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_hash {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl hash::Hash for $id {
|
||||
impl ::hash::Hash for $id {
|
||||
#[inline]
|
||||
fn hash<H: hash::Hasher>(&self, state: &mut H) {
|
||||
fn hash<H: ::hash::Hasher>(&self, state: &mut H) {
|
||||
union A {
|
||||
data: [$elem_ty; $id::lanes()],
|
||||
vec: $id
|
||||
|
|
@ -18,7 +19,6 @@ macro_rules! impl_hash {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_hash {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
#[test]
|
||||
|
|
@ -26,8 +26,7 @@ macro_rules! test_hash {
|
|||
use ::coresimd::simd::$id;
|
||||
use ::std::collections::hash_map::DefaultHasher;
|
||||
use ::std::hash::{Hash, Hasher};
|
||||
use ::std::{mem, clone};
|
||||
use clone::Clone;
|
||||
use ::std::mem;
|
||||
type A = [$elem_ty; $id::lanes()];
|
||||
let a: A = [42 as $elem_ty; $id::lanes()];
|
||||
assert!(mem::size_of::<A>() == mem::size_of::<$id>());
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
//! Implements the load/store API.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_load_store {
|
||||
($id:ident, $elem_ty:ident, $elem_count:expr) => {
|
||||
|
|
@ -11,10 +12,11 @@ macro_rules! impl_load_store {
|
|||
/// aligned to an `align_of::<Self>()` boundary.
|
||||
#[inline]
|
||||
pub fn store_aligned(self, slice: &mut [$elem_ty]) {
|
||||
use ::slice::SliceExt;
|
||||
unsafe {
|
||||
assert!(slice.len() >= $elem_count);
|
||||
let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty;
|
||||
assert!(target_ptr.align_offset(mem::align_of::<Self>()) == 0);
|
||||
assert!(target_ptr.align_offset(::mem::align_of::<Self>()) == 0);
|
||||
self.store_aligned_unchecked(slice);
|
||||
}
|
||||
}
|
||||
|
|
@ -26,6 +28,7 @@ macro_rules! impl_load_store {
|
|||
/// If `slice.len() < Self::lanes()`.
|
||||
#[inline]
|
||||
pub fn store_unaligned(self, slice: &mut [$elem_ty]) {
|
||||
use ::slice::SliceExt;
|
||||
unsafe {
|
||||
assert!(slice.len() >= $elem_count);
|
||||
self.store_unaligned_unchecked(slice);
|
||||
|
|
@ -45,6 +48,7 @@ macro_rules! impl_load_store {
|
|||
slice: &mut [$elem_ty]
|
||||
|
||||
) {
|
||||
use ::slice::SliceExt;
|
||||
*(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = self;
|
||||
}
|
||||
|
||||
|
|
@ -58,9 +62,10 @@ macro_rules! impl_load_store {
|
|||
self,
|
||||
slice: &mut [$elem_ty]
|
||||
) {
|
||||
use ::slice::SliceExt;
|
||||
let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8;
|
||||
let self_ptr = &self as *const Self as *const u8;
|
||||
ptr::copy_nonoverlapping(self_ptr, target_ptr, mem::size_of::<Self>());
|
||||
::ptr::copy_nonoverlapping(self_ptr, target_ptr, ::mem::size_of::<Self>());
|
||||
}
|
||||
|
||||
/// Instantiates a new vector with the values of the `slice`.
|
||||
|
|
@ -72,9 +77,10 @@ macro_rules! impl_load_store {
|
|||
#[inline]
|
||||
pub fn load_aligned(slice: &[$elem_ty]) -> Self {
|
||||
unsafe {
|
||||
use ::slice::SliceExt;
|
||||
assert!(slice.len() >= $elem_count);
|
||||
let target_ptr = slice.get_unchecked(0) as *const $elem_ty;
|
||||
assert!(target_ptr.align_offset(mem::align_of::<Self>()) == 0);
|
||||
assert!(target_ptr.align_offset(::mem::align_of::<Self>()) == 0);
|
||||
Self::load_aligned_unchecked(slice)
|
||||
}
|
||||
}
|
||||
|
|
@ -86,6 +92,7 @@ macro_rules! impl_load_store {
|
|||
/// If `slice.len() < Self::lanes()`.
|
||||
#[inline]
|
||||
pub fn load_unaligned(slice: &[$elem_ty]) -> Self {
|
||||
use ::slice::SliceExt;
|
||||
unsafe {
|
||||
assert!(slice.len() >= $elem_count);
|
||||
Self::load_unaligned_unchecked(slice)
|
||||
|
|
@ -100,6 +107,7 @@ macro_rules! impl_load_store {
|
|||
/// to an `align_of::<Self>()` boundary, the behavior is undefined.
|
||||
#[inline]
|
||||
pub unsafe fn load_aligned_unchecked(slice: &[$elem_ty]) -> Self {
|
||||
use ::slice::SliceExt;
|
||||
*(slice.get_unchecked(0) as *const $elem_ty as *const Self)
|
||||
}
|
||||
|
||||
|
|
@ -110,11 +118,12 @@ macro_rules! impl_load_store {
|
|||
/// If `slice.len() < Self::lanes()` the behavior is undefined.
|
||||
#[inline]
|
||||
pub unsafe fn load_unaligned_unchecked(slice: &[$elem_ty]) -> Self {
|
||||
use mem::size_of;
|
||||
use ::slice::SliceExt;
|
||||
use ::mem::size_of;
|
||||
let target_ptr = slice.get_unchecked(0) as *const $elem_ty as *const u8;
|
||||
let mut x = Self::splat(0 as $elem_ty);
|
||||
let self_ptr = &mut x as *mut Self as *mut u8;
|
||||
ptr::copy_nonoverlapping(target_ptr,self_ptr,size_of::<Self>());
|
||||
::ptr::copy_nonoverlapping(target_ptr,self_ptr,size_of::<Self>());
|
||||
x
|
||||
}
|
||||
}
|
||||
|
|
@ -122,7 +131,6 @@ macro_rules! impl_load_store {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_load_store {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
#[test]
|
||||
|
|
@ -177,7 +185,7 @@ macro_rules! test_load_store {
|
|||
|
||||
union A {
|
||||
data: [$elem_ty; 2 * ::coresimd::simd::$id::lanes()],
|
||||
vec: ::coresimd::simd::$id,
|
||||
_vec: ::coresimd::simd::$id,
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
//!
|
||||
//! Minimal portable vector types API.
|
||||
#![allow(unused)]
|
||||
|
||||
/// Minimal interface: all packed SIMD vector types implement this.
|
||||
macro_rules! impl_minimal {
|
||||
($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => {
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(expl_impl_clone_on_copy))]
|
||||
impl Clone for $id {
|
||||
impl ::clone::Clone for $id {
|
||||
#[inline] // currently needed for correctness
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
|
|
@ -49,9 +50,12 @@ macro_rules! impl_minimal {
|
|||
|
||||
/// Extracts the value at `index`.
|
||||
///
|
||||
/// # Precondition
|
||||
///
|
||||
/// If `index >= Self::lanes()` the behavior is undefined.
|
||||
#[inline]
|
||||
pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty {
|
||||
use coresimd::simd_llvm::simd_extract;
|
||||
simd_extract(self, index as u32)
|
||||
}
|
||||
|
||||
|
|
@ -69,9 +73,9 @@ macro_rules! impl_minimal {
|
|||
|
||||
/// Returns a new vector where the value at `index` is replaced by `new_value`.
|
||||
///
|
||||
/// # Panics
|
||||
/// # Precondition
|
||||
///
|
||||
/// If `index >= Self::lanes()`.
|
||||
/// If `index >= Self::lanes()` the behavior is undefined.
|
||||
#[inline]
|
||||
#[must_use = "replace_unchecked does not modify the original value - it returns a new vector with the value at `index` replaced by `new_value`d"]
|
||||
pub unsafe fn replace_unchecked(
|
||||
|
|
@ -79,6 +83,7 @@ macro_rules! impl_minimal {
|
|||
index: usize,
|
||||
new_value: $elem_ty,
|
||||
) -> Self {
|
||||
use coresimd::simd_llvm::simd_insert;
|
||||
simd_insert(self, index as u32, new_value)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,17 +1,63 @@
|
|||
//! Implements portable arithmetic vector reductions.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_minmax_reductions {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl $id {
|
||||
/// Largest vector value.
|
||||
///
|
||||
/// FIXME: document behavior for float vectors with NaNs.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn max(self) -> $elem_ty {
|
||||
ReduceMax::reduce_max(self)
|
||||
use ::coresimd::simd_llvm::simd_reduce_max;
|
||||
unsafe {
|
||||
simd_reduce_max(self)
|
||||
}
|
||||
}
|
||||
/// Largest vector value.
|
||||
///
|
||||
/// FIXME: document behavior for float vectors with NaNs.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[allow(unused_imports)]
|
||||
#[inline]
|
||||
pub fn max(self) -> $elem_ty {
|
||||
// FIXME: broken on AArch64
|
||||
use ::num::Float;
|
||||
use ::cmp::Ord;
|
||||
let mut x = self.extract(0);
|
||||
for i in 1..$id::lanes() {
|
||||
x = x.max(self.extract(i));
|
||||
}
|
||||
x
|
||||
}
|
||||
|
||||
/// Smallest vector value.
|
||||
///
|
||||
/// FIXME: document behavior for float vectors with NaNs.
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
pub fn min(self) -> $elem_ty {
|
||||
ReduceMin::reduce_min(self)
|
||||
use ::coresimd::simd_llvm::simd_reduce_min;
|
||||
unsafe {
|
||||
simd_reduce_min(self)
|
||||
}
|
||||
}
|
||||
/// Smallest vector value.
|
||||
///
|
||||
/// FIXME: document behavior for float vectors with NaNs.
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[allow(unused_imports)]
|
||||
#[inline]
|
||||
pub fn min(self) -> $elem_ty {
|
||||
// FIXME: broken on AArch64
|
||||
use ::num::Float;
|
||||
use ::cmp::Ord;
|
||||
let mut x = self.extract(0);
|
||||
for i in 1..$id::lanes() {
|
||||
x = x.min(self.extract(i));
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@
|
|||
//! * [x] boolean reductions: implemented by boolean vectors.
|
||||
//! * [ ] portable shuffles: `shufflevector`.
|
||||
//! * [ ] portable `gather`/`scatter`:
|
||||
#![allow(unused)]
|
||||
|
||||
/// Adds the vector type `$id`, with elements of types `$elem_tys`.
|
||||
macro_rules! define_ty {
|
||||
|
|
@ -113,170 +114,155 @@ mod partial_eq;
|
|||
#[macro_use]
|
||||
mod shifts;
|
||||
|
||||
/// Imports required to implement vector types using the macros.
|
||||
|
||||
macro_rules! simd_api_imports {
|
||||
() => {
|
||||
use ::coresimd::simd_llvm::*;
|
||||
use fmt;
|
||||
use hash;
|
||||
use ops;
|
||||
#[allow(unused_imports)]
|
||||
use num;
|
||||
use cmp::{Eq, PartialEq};
|
||||
use ptr;
|
||||
use mem;
|
||||
#[allow(unused_imports)]
|
||||
use convert::{From, Into};
|
||||
use slice::SliceExt;
|
||||
#[allow(unused_imports)]
|
||||
use iter::Iterator;
|
||||
#[allow(unused_imports)]
|
||||
use default::Default;
|
||||
use clone::Clone;
|
||||
use super::codegen::sum::{ReduceAdd};
|
||||
use super::codegen::product::{ReduceMul};
|
||||
use super::codegen::and::{ReduceAnd};
|
||||
use super::codegen::or::{ReduceOr};
|
||||
use super::codegen::xor::{ReduceXor};
|
||||
use super::codegen::min::{ReduceMin};
|
||||
use super::codegen::max::{ReduceMax};
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines a portable packed SIMD floating-point vector type.
|
||||
macro_rules! simd_f_ty {
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident |
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident |
|
||||
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
||||
define_ty!($id, $($elem_tys),+ | $(#[$doc])*);
|
||||
impl_minimal!($id, $elem_ty, $elem_count, $($elem_name),*);
|
||||
impl_load_store!($id, $elem_ty, $elem_count);
|
||||
impl_cmp!($id, $bool_ty);
|
||||
impl_arithmetic_ops!($id);
|
||||
impl_arithmetic_reductions!($id, $elem_ty);
|
||||
impl_minmax_reductions!($id, $elem_ty);
|
||||
impl_neg_op!($id, $elem_ty);
|
||||
impl_partial_eq!($id);
|
||||
impl_default!($id, $elem_ty);
|
||||
vector_impl!(
|
||||
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
|
||||
[impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*],
|
||||
[impl_load_store, $id, $elem_ty, $elem_count],
|
||||
[impl_cmp, $id, $bool_ty],
|
||||
[impl_arithmetic_ops, $id],
|
||||
[impl_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_minmax_reductions, $id, $elem_ty],
|
||||
[impl_neg_op, $id, $elem_ty],
|
||||
[impl_partial_eq, $id],
|
||||
[impl_default, $id, $elem_ty]
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
test_minimal!($id, $elem_ty, $elem_count);
|
||||
test_load_store!($id, $elem_ty);
|
||||
test_cmp!($id, $elem_ty, $bool_ty, 1. as $elem_ty, 0. as $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_neg_op!($id, $elem_ty);
|
||||
test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty);
|
||||
test_default!($id, $elem_ty);
|
||||
}
|
||||
$test_macro!(
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
test_minimal!($id, $elem_ty, $elem_count);
|
||||
test_load_store!($id, $elem_ty);
|
||||
test_cmp!($id, $elem_ty, $bool_ty, 1. as $elem_ty, 0. as $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_neg_op!($id, $elem_ty);
|
||||
test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty);
|
||||
test_default!($id, $elem_ty);
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines a portable packed SIMD signed-integer vector type.
|
||||
macro_rules! simd_i_ty {
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident |
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident |
|
||||
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
||||
define_ty!($id, $($elem_tys),+ | $(#[$doc])*);
|
||||
impl_minimal!($id, $elem_ty, $elem_count, $($elem_name),*);
|
||||
impl_load_store!($id, $elem_ty, $elem_count);
|
||||
impl_cmp!($id, $bool_ty);
|
||||
impl_hash!($id, $elem_ty);
|
||||
impl_arithmetic_ops!($id);
|
||||
impl_arithmetic_reductions!($id, $elem_ty);
|
||||
impl_minmax_reductions!($id, $elem_ty);
|
||||
impl_neg_op!($id, $elem_ty);
|
||||
impl_bitwise_ops!($id, !(0 as $elem_ty));
|
||||
impl_bitwise_reductions!($id, $elem_ty);
|
||||
impl_all_shifts!($id, $elem_ty);
|
||||
impl_hex_fmt!($id, $elem_ty);
|
||||
impl_eq!($id);
|
||||
impl_partial_eq!($id);
|
||||
impl_default!($id, $elem_ty);
|
||||
vector_impl!(
|
||||
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
|
||||
[impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*],
|
||||
[impl_load_store, $id, $elem_ty, $elem_count],
|
||||
[impl_cmp, $id, $bool_ty],
|
||||
[impl_hash, $id, $elem_ty],
|
||||
[impl_arithmetic_ops, $id],
|
||||
[impl_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_minmax_reductions, $id, $elem_ty],
|
||||
[impl_neg_op, $id, $elem_ty],
|
||||
[impl_bitwise_ops, $id, !(0 as $elem_ty)],
|
||||
[impl_bitwise_reductions, $id, $elem_ty],
|
||||
[impl_all_shifts, $id, $elem_ty],
|
||||
[impl_hex_fmt, $id, $elem_ty],
|
||||
[impl_eq, $id],
|
||||
[impl_partial_eq, $id],
|
||||
[impl_default, $id, $elem_ty]
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
test_minimal!($id, $elem_ty, $elem_count);
|
||||
test_load_store!($id, $elem_ty);
|
||||
test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty);
|
||||
test_hash!($id, $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_neg_op!($id, $elem_ty);
|
||||
test_int_bitwise_ops!($id, $elem_ty);
|
||||
test_bitwise_reductions!($id, !(0 as $elem_ty));
|
||||
test_all_shift_ops!($id, $elem_ty);
|
||||
test_hex_fmt!($id, $elem_ty);
|
||||
test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty);
|
||||
test_default!($id, $elem_ty);
|
||||
}
|
||||
$test_macro!(
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
test_minimal!($id, $elem_ty, $elem_count);
|
||||
test_load_store!($id, $elem_ty);
|
||||
test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty);
|
||||
test_hash!($id, $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_neg_op!($id, $elem_ty);
|
||||
test_int_bitwise_ops!($id, $elem_ty);
|
||||
test_bitwise_reductions!($id, !(0 as $elem_ty));
|
||||
test_all_shift_ops!($id, $elem_ty);
|
||||
test_hex_fmt!($id, $elem_ty);
|
||||
test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty);
|
||||
test_default!($id, $elem_ty);
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines a portable packed SIMD unsigned-integer vector type.
|
||||
macro_rules! simd_u_ty {
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident |
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident |
|
||||
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
||||
define_ty!($id, $($elem_tys),+ | $(#[$doc])*);
|
||||
impl_minimal!($id, $elem_ty, $elem_count, $($elem_name),*);
|
||||
impl_load_store!($id, $elem_ty, $elem_count);
|
||||
impl_cmp!($id, $bool_ty);
|
||||
impl_hash!($id, $elem_ty);
|
||||
impl_arithmetic_ops!($id);
|
||||
impl_arithmetic_reductions!($id, $elem_ty);
|
||||
impl_minmax_reductions!($id, $elem_ty);
|
||||
impl_bitwise_ops!($id, !(0 as $elem_ty));
|
||||
impl_bitwise_reductions!($id, $elem_ty);
|
||||
impl_all_shifts!($id, $elem_ty);
|
||||
impl_hex_fmt!($id, $elem_ty);
|
||||
impl_eq!($id);
|
||||
impl_partial_eq!($id);
|
||||
impl_default!($id, $elem_ty);
|
||||
vector_impl!(
|
||||
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
|
||||
[impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*],
|
||||
[impl_load_store, $id, $elem_ty, $elem_count],
|
||||
[impl_cmp, $id, $bool_ty],
|
||||
[impl_hash, $id, $elem_ty],
|
||||
[impl_arithmetic_ops, $id],
|
||||
[impl_arithmetic_reductions, $id, $elem_ty],
|
||||
[impl_minmax_reductions, $id, $elem_ty],
|
||||
[impl_bitwise_ops, $id, !(0 as $elem_ty)],
|
||||
[impl_bitwise_reductions, $id, $elem_ty],
|
||||
[impl_all_shifts, $id, $elem_ty],
|
||||
[impl_hex_fmt, $id, $elem_ty],
|
||||
[impl_eq, $id],
|
||||
[impl_partial_eq, $id],
|
||||
[impl_default, $id, $elem_ty]
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
test_minimal!($id, $elem_ty, $elem_count);
|
||||
test_load_store!($id, $elem_ty);
|
||||
test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty);
|
||||
test_hash!($id, $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_int_bitwise_ops!($id, $elem_ty);
|
||||
test_bitwise_reductions!($id, !(0 as $elem_ty));
|
||||
test_all_shift_ops!($id, $elem_ty);
|
||||
test_hex_fmt!($id, $elem_ty);
|
||||
test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty);
|
||||
test_default!($id, $elem_ty);
|
||||
}
|
||||
$test_macro!(
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
test_minimal!($id, $elem_ty, $elem_count);
|
||||
test_load_store!($id, $elem_ty);
|
||||
test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty);
|
||||
test_hash!($id, $elem_ty);
|
||||
test_arithmetic_ops!($id, $elem_ty);
|
||||
test_arithmetic_reductions!($id, $elem_ty);
|
||||
test_minmax_reductions!($id, $elem_ty);
|
||||
test_int_bitwise_ops!($id, $elem_ty);
|
||||
test_bitwise_reductions!($id, !(0 as $elem_ty));
|
||||
test_all_shift_ops!($id, $elem_ty);
|
||||
test_hex_fmt!($id, $elem_ty);
|
||||
test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty);
|
||||
test_default!($id, $elem_ty);
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Defines a portable packed SIMD boolean vector type.
|
||||
macro_rules! simd_b_ty {
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $test_mod:ident |
|
||||
($id:ident : $elem_count:expr, $elem_ty:ident, $test_mod:ident, $test_macro:ident |
|
||||
$($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => {
|
||||
define_ty!($id, $($elem_tys),+ | $(#[$doc])*);
|
||||
impl_bool_minimal!($id, $elem_ty, $elem_count, $($elem_name),*);
|
||||
impl_bitwise_ops!($id, true);
|
||||
impl_bool_bitwise_reductions!($id, bool);
|
||||
impl_bool_reductions!($id);
|
||||
impl_bool_cmp!($id, $id);
|
||||
impl_eq!($id);
|
||||
impl_partial_eq!($id);
|
||||
impl_default!($id, bool);
|
||||
vector_impl!(
|
||||
[define_ty, $id, $($elem_tys),+ | $(#[$doc])*],
|
||||
[impl_bool_minimal, $id, $elem_ty, $elem_count, $($elem_name),*],
|
||||
[impl_bitwise_ops, $id, true],
|
||||
[impl_bool_bitwise_reductions, $id, bool, $elem_ty],
|
||||
[impl_bool_reductions, $id],
|
||||
[impl_bool_cmp, $id, $id],
|
||||
[impl_eq, $id],
|
||||
[impl_partial_eq, $id],
|
||||
[impl_default, $id, bool]
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
test_bool_minimal!($id, $elem_count);
|
||||
test_bool_bitwise_ops!($id);
|
||||
test_bool_reductions!($id);
|
||||
test_bitwise_reductions!($id, true);
|
||||
test_cmp!($id, $elem_ty, $id, true, false);
|
||||
test_partial_eq!($id, true, false);
|
||||
test_default!($id, bool);
|
||||
}
|
||||
$test_macro!(
|
||||
#[cfg(test)]
|
||||
mod $test_mod {
|
||||
test_bool_minimal!($id, $elem_count);
|
||||
test_bool_bitwise_ops!($id);
|
||||
test_bool_reductions!($id);
|
||||
test_bitwise_reductions!($id, true);
|
||||
test_cmp!($id, $elem_ty, $id, true, false);
|
||||
test_partial_eq!($id, true, false);
|
||||
test_default!($id, bool);
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
//! Implements `std::ops::Neg` for signed vector types.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_neg_op {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
impl ops::Neg for $id {
|
||||
impl ::ops::Neg for $id {
|
||||
type Output = Self;
|
||||
#[inline]
|
||||
fn neg(self) -> Self {
|
||||
|
|
@ -13,7 +14,6 @@ macro_rules! impl_neg_op {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_neg_op {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
//! Implements `PartialEq` for vector types.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_partial_eq {
|
||||
($id:ident) => {
|
||||
impl PartialEq<$id> for $id {
|
||||
impl ::cmp::PartialEq<$id> for $id {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
$id::eq(*self, *other).all()
|
||||
|
|
@ -16,7 +17,6 @@ macro_rules! impl_partial_eq {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_partial_eq {
|
||||
($id:ident, $true:expr, $false:expr) => {
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
//! Implements integer shifts.
|
||||
#![allow(unused)]
|
||||
|
||||
macro_rules! impl_shifts {
|
||||
($id:ident, $elem_ty:ident, $($by:ident),+) => {
|
||||
|
|
@ -7,6 +8,7 @@ macro_rules! impl_shifts {
|
|||
type Output = Self;
|
||||
#[inline]
|
||||
fn shl(self, other: $by) -> Self {
|
||||
use coresimd::simd_llvm::simd_shl;
|
||||
unsafe { simd_shl(self, $id::splat(other as $elem_ty)) }
|
||||
}
|
||||
}
|
||||
|
|
@ -14,6 +16,7 @@ macro_rules! impl_shifts {
|
|||
type Output = Self;
|
||||
#[inline]
|
||||
fn shr(self, other: $by) -> Self {
|
||||
use coresimd::simd_llvm::simd_shr;
|
||||
unsafe { simd_shr(self, $id::splat(other as $elem_ty)) }
|
||||
}
|
||||
}
|
||||
|
|
@ -46,7 +49,6 @@ macro_rules! impl_all_shifts {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_shift_ops {
|
||||
($id:ident, $elem_ty:ident, $($index_ty:ident),+) => {
|
||||
#[test]
|
||||
|
|
@ -111,7 +113,6 @@ macro_rules! test_shift_ops {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! test_all_shift_ops {
|
||||
($id:ident, $elem_ty:ident) => {
|
||||
test_shift_ops!(
|
||||
|
|
|
|||
|
|
@ -1,170 +0,0 @@
|
|||
//! Code generation for the and reduction.
|
||||
use coresimd::simd::*;
|
||||
|
||||
/// LLVM intrinsics used in the and reduction
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i8.v2i8"]
|
||||
fn reduce_and_i8x2(x: i8x2) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u8.v2u8"]
|
||||
fn reduce_and_u8x2(x: u8x2) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i16.v2i16"]
|
||||
fn reduce_and_i16x2(x: i16x2) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u16.v2u16"]
|
||||
fn reduce_and_u16x2(x: u16x2) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i32.v2i32"]
|
||||
fn reduce_and_i32x2(x: i32x2) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u32.v2u32"]
|
||||
fn reduce_and_u32x2(x: u32x2) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i64.v2i64"]
|
||||
fn reduce_and_i64x2(x: i64x2) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u64.v2u64"]
|
||||
fn reduce_and_u64x2(x: u64x2) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i8.v4i8"]
|
||||
fn reduce_and_i8x4(x: i8x4) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u8.v4u8"]
|
||||
fn reduce_and_u8x4(x: u8x4) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i16.v4i16"]
|
||||
fn reduce_and_i16x4(x: i16x4) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u16.v4u16"]
|
||||
fn reduce_and_u16x4(x: u16x4) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i32.v4i32"]
|
||||
fn reduce_and_i32x4(x: i32x4) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u32.v4u32"]
|
||||
fn reduce_and_u32x4(x: u32x4) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i64.v4i64"]
|
||||
fn reduce_and_i64x4(x: i64x4) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u64.v4u64"]
|
||||
fn reduce_and_u64x4(x: u64x4) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i8.v8i8"]
|
||||
fn reduce_and_i8x8(x: i8x8) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u8.v8u8"]
|
||||
fn reduce_and_u8x8(x: u8x8) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i16.v8i16"]
|
||||
fn reduce_and_i16x8(x: i16x8) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u16.v8u16"]
|
||||
fn reduce_and_u16x8(x: u16x8) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i32.v8i32"]
|
||||
fn reduce_and_i32x8(x: i32x8) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u32.v8u32"]
|
||||
fn reduce_and_u32x8(x: u32x8) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i64.v8i64"]
|
||||
fn reduce_and_i64x8(x: i64x8) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u64.v8u64"]
|
||||
fn reduce_and_u64x8(x: u64x8) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i8.v16i8"]
|
||||
fn reduce_and_i8x16(x: i8x16) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u8.v16u8"]
|
||||
fn reduce_and_u8x16(x: u8x16) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i16.v16i16"]
|
||||
fn reduce_and_i16x16(x: i16x16) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u16.v16u16"]
|
||||
fn reduce_and_u16x16(x: u16x16) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i32.v16i32"]
|
||||
fn reduce_and_i32x16(x: i32x16) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u32.v16u32"]
|
||||
fn reduce_and_u32x16(x: u32x16) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i8.v32i8"]
|
||||
fn reduce_and_i8x32(x: i8x32) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u8.v32u8"]
|
||||
fn reduce_and_u8x32(x: u8x32) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i16.v32i16"]
|
||||
fn reduce_and_i16x32(x: i16x32) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u16.v32u16"]
|
||||
fn reduce_and_u16x32(x: u16x32) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.i8.v64i8"]
|
||||
fn reduce_and_i8x64(x: i8x64) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.and.u8.v64u8"]
|
||||
fn reduce_and_u8x64(x: u8x64) -> u8;
|
||||
}
|
||||
|
||||
/// Reduction: horizontal bitwise and of the vector elements.
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
pub trait ReduceAnd {
|
||||
/// Result type of the reduction.
|
||||
type Acc;
|
||||
/// Computes the horizontal bitwise and of the vector elements
|
||||
fn reduce_and(self) -> Self::Acc;
|
||||
}
|
||||
|
||||
macro_rules! red_and {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceAnd for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
fn reduce_and(self) -> Self::Acc {
|
||||
unsafe { $llvm_intr(self.into_bits()) }
|
||||
}
|
||||
// FIXME: broken in AArch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
fn reduce_and(self) -> Self::Acc {
|
||||
let mut x = self.extract(0) as Self::Acc;
|
||||
for i in 1..$id::lanes() {
|
||||
x &= self.extract(i) as Self::Acc;
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
red_and!(i8x2, i8, reduce_and_i8x2);
|
||||
red_and!(u8x2, u8, reduce_and_u8x2);
|
||||
red_and!(i16x2, i16, reduce_and_i16x2);
|
||||
red_and!(u16x2, u16, reduce_and_u16x2);
|
||||
red_and!(i32x2, i32, reduce_and_i32x2);
|
||||
red_and!(u32x2, u32, reduce_and_u32x2);
|
||||
red_and!(i64x2, i64, reduce_and_i64x2);
|
||||
red_and!(u64x2, u64, reduce_and_u64x2);
|
||||
red_and!(i8x4, i8, reduce_and_i8x4);
|
||||
red_and!(u8x4, u8, reduce_and_u8x4);
|
||||
red_and!(i16x4, i16, reduce_and_i16x4);
|
||||
red_and!(u16x4, u16, reduce_and_u16x4);
|
||||
red_and!(i32x4, i32, reduce_and_i32x4);
|
||||
red_and!(u32x4, u32, reduce_and_u32x4);
|
||||
red_and!(i64x4, i64, reduce_and_i64x4);
|
||||
red_and!(u64x4, u64, reduce_and_u64x4);
|
||||
red_and!(i8x8, i8, reduce_and_i8x8);
|
||||
red_and!(u8x8, u8, reduce_and_u8x8);
|
||||
red_and!(i16x8, i16, reduce_and_i16x8);
|
||||
red_and!(u16x8, u16, reduce_and_u16x8);
|
||||
red_and!(i32x8, i32, reduce_and_i32x8);
|
||||
red_and!(u32x8, u32, reduce_and_u32x8);
|
||||
red_and!(i64x8, i64, reduce_and_i64x8);
|
||||
red_and!(u64x8, u64, reduce_and_u64x8);
|
||||
red_and!(i8x16, i8, reduce_and_i8x16);
|
||||
red_and!(u8x16, u8, reduce_and_u8x16);
|
||||
red_and!(i16x16, i16, reduce_and_i16x16);
|
||||
red_and!(u16x16, u16, reduce_and_u16x16);
|
||||
red_and!(i32x16, i32, reduce_and_i32x16);
|
||||
red_and!(u32x16, u32, reduce_and_u32x16);
|
||||
red_and!(i8x32, i8, reduce_and_i8x32);
|
||||
red_and!(u8x32, u8, reduce_and_u8x32);
|
||||
red_and!(i16x32, i16, reduce_and_i16x32);
|
||||
red_and!(u16x32, u16, reduce_and_u16x32);
|
||||
red_and!(i8x64, i8, reduce_and_i8x64);
|
||||
red_and!(u8x64, u8, reduce_and_u8x64);
|
||||
|
||||
red_and!(b8x2, i8, reduce_and_i8x2);
|
||||
red_and!(b8x4, i8, reduce_and_i8x4);
|
||||
red_and!(b8x8, i8, reduce_and_i8x8);
|
||||
red_and!(b8x16, i8, reduce_and_i8x16);
|
||||
red_and!(b8x32, i8, reduce_and_i8x32);
|
||||
red_and!(b8x64, i8, reduce_and_i8x64);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ReduceAnd;
|
||||
use coresimd::simd::*;
|
||||
|
||||
// note: these are tested in the portable vector API tests
|
||||
|
||||
#[test]
|
||||
fn reduce_and_i32x4() {
|
||||
let v = i32x4::splat(1);
|
||||
assert_eq!(v.reduce_and(), 1_i32);
|
||||
let v = i32x4::new(1, 1, 0, 1);
|
||||
assert_eq!(v.reduce_and(), 0_i32);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,196 +0,0 @@
|
|||
//! Code generation for the max reduction.
|
||||
use coresimd::simd::*;
|
||||
|
||||
/// LLVM intrinsics used in the max reduction
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i8.v2i8"]
|
||||
fn reduce_max_i8x2(x: i8x2) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u8.v2u8"]
|
||||
fn reduce_max_u8x2(x: u8x2) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i16.v2i16"]
|
||||
fn reduce_max_i16x2(x: i16x2) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u16.v2u16"]
|
||||
fn reduce_max_u16x2(x: u16x2) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i32.v2i32"]
|
||||
fn reduce_max_i32x2(x: i32x2) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u32.v2u32"]
|
||||
fn reduce_max_u32x2(x: u32x2) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i64.v2i64"]
|
||||
fn reduce_max_i64x2(x: i64x2) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u64.v2u64"]
|
||||
fn reduce_max_u64x2(x: u64x2) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i8.v4i8"]
|
||||
fn reduce_max_i8x4(x: i8x4) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u8.v4u8"]
|
||||
fn reduce_max_u8x4(x: u8x4) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i16.v4i16"]
|
||||
fn reduce_max_i16x4(x: i16x4) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u16.v4u16"]
|
||||
fn reduce_max_u16x4(x: u16x4) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i32.v4i32"]
|
||||
fn reduce_max_i32x4(x: i32x4) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u32.v4u32"]
|
||||
fn reduce_max_u32x4(x: u32x4) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i64.v4i64"]
|
||||
fn reduce_max_i64x4(x: i64x4) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u64.v4u64"]
|
||||
fn reduce_max_u64x4(x: u64x4) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i8.v8i8"]
|
||||
fn reduce_max_i8x8(x: i8x8) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u8.v8u8"]
|
||||
fn reduce_max_u8x8(x: u8x8) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i16.v8i16"]
|
||||
fn reduce_max_i16x8(x: i16x8) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u16.v8u16"]
|
||||
fn reduce_max_u16x8(x: u16x8) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i32.v8i32"]
|
||||
fn reduce_max_i32x8(x: i32x8) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u32.v8u32"]
|
||||
fn reduce_max_u32x8(x: u32x8) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i64.v8i64"]
|
||||
fn reduce_max_i64x8(x: i64x8) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u64.v8u64"]
|
||||
fn reduce_max_u64x8(x: u64x8) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i8.v16i8"]
|
||||
fn reduce_max_i8x16(x: i8x16) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u8.v16u8"]
|
||||
fn reduce_max_u8x16(x: u8x16) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i16.v16i16"]
|
||||
fn reduce_max_i16x16(x: i16x16) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u16.v16u16"]
|
||||
fn reduce_max_u16x16(x: u16x16) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i32.v16i32"]
|
||||
fn reduce_max_i32x16(x: i32x16) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u32.v16u32"]
|
||||
fn reduce_max_u32x16(x: u32x16) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i8.v32i8"]
|
||||
fn reduce_max_i8x32(x: i8x32) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u8.v32u8"]
|
||||
fn reduce_max_u8x32(x: u8x32) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i16.v32i16"]
|
||||
fn reduce_max_i16x32(x: i16x32) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u16.v32u16"]
|
||||
fn reduce_max_u16x32(x: u16x32) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smax.i8.v64i8"]
|
||||
fn reduce_max_i8x64(x: i8x64) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umax.u8.v64u8"]
|
||||
fn reduce_max_u8x64(x: u8x64) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmax.f32.v2f32"]
|
||||
fn reduce_fmax_f32x2(x: f32x2) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmax.f64.v2f64"]
|
||||
fn reduce_fmax_f64x2(x: f64x2) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmax.f32.v4f32"]
|
||||
fn reduce_fmax_f32x4(x: f32x4) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmax.f64.v4f64"]
|
||||
fn reduce_fmax_f64x4(x: f64x4) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmax.f32.v8f32"]
|
||||
fn reduce_fmax_f32x8(x: f32x8) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmax.f64.v8f64"]
|
||||
fn reduce_fmax_f64x8(x: f64x8) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmax.f32.v16f32"]
|
||||
fn reduce_fmax_f32x16(x: f32x16) -> f32;
|
||||
}
|
||||
|
||||
/// Reduction: horizontal max of the vector elements.
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
pub trait ReduceMax {
|
||||
/// Result type of the reduction.
|
||||
type Acc;
|
||||
/// Computes the horizontal max of the vector elements.
|
||||
fn reduce_max(self) -> Self::Acc;
|
||||
}
|
||||
|
||||
macro_rules! red_max {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceMax for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
fn reduce_max(self) -> Self::Acc {
|
||||
unsafe { $llvm_intr(self) }
|
||||
}
|
||||
// FIXME: broken on AArch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[allow(unused_imports)]
|
||||
#[inline]
|
||||
fn reduce_max(self) -> Self::Acc {
|
||||
use num::Float;
|
||||
use cmp::Ord;
|
||||
let mut x = self.extract(0);
|
||||
for i in 1..$id::lanes() {
|
||||
x = x.max(self.extract(i));
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
red_max!(i8x2, i8, reduce_max_i8x2);
|
||||
red_max!(u8x2, u8, reduce_max_u8x2);
|
||||
red_max!(i16x2, i16, reduce_max_i16x2);
|
||||
red_max!(u16x2, u16, reduce_max_u16x2);
|
||||
red_max!(i32x2, i32, reduce_max_i32x2);
|
||||
red_max!(u32x2, u32, reduce_max_u32x2);
|
||||
red_max!(i64x2, i64, reduce_max_i64x2);
|
||||
red_max!(u64x2, u64, reduce_max_u64x2);
|
||||
red_max!(i8x4, i8, reduce_max_i8x4);
|
||||
red_max!(u8x4, u8, reduce_max_u8x4);
|
||||
red_max!(i16x4, i16, reduce_max_i16x4);
|
||||
red_max!(u16x4, u16, reduce_max_u16x4);
|
||||
red_max!(i32x4, i32, reduce_max_i32x4);
|
||||
red_max!(u32x4, u32, reduce_max_u32x4);
|
||||
red_max!(i64x4, i64, reduce_max_i64x4);
|
||||
red_max!(u64x4, u64, reduce_max_u64x4);
|
||||
red_max!(i8x8, i8, reduce_max_i8x8);
|
||||
red_max!(u8x8, u8, reduce_max_u8x8);
|
||||
red_max!(i16x8, i16, reduce_max_i16x8);
|
||||
red_max!(u16x8, u16, reduce_max_u16x8);
|
||||
red_max!(i32x8, i32, reduce_max_i32x8);
|
||||
red_max!(u32x8, u32, reduce_max_u32x8);
|
||||
red_max!(i64x8, i64, reduce_max_i64x8);
|
||||
red_max!(u64x8, u64, reduce_max_u64x8);
|
||||
red_max!(i8x16, i8, reduce_max_i8x16);
|
||||
red_max!(u8x16, u8, reduce_max_u8x16);
|
||||
red_max!(i16x16, i16, reduce_max_i16x16);
|
||||
red_max!(u16x16, u16, reduce_max_u16x16);
|
||||
red_max!(i32x16, i32, reduce_max_i32x16);
|
||||
red_max!(u32x16, u32, reduce_max_u32x16);
|
||||
red_max!(i8x32, i8, reduce_max_i8x32);
|
||||
red_max!(u8x32, u8, reduce_max_u8x32);
|
||||
red_max!(i16x32, i16, reduce_max_i16x32);
|
||||
red_max!(u16x32, u16, reduce_max_u16x32);
|
||||
red_max!(i8x64, i8, reduce_max_i8x64);
|
||||
red_max!(u8x64, u8, reduce_max_u8x64);
|
||||
|
||||
red_max!(f32x2, f32, reduce_fmax_f32x2);
|
||||
red_max!(f64x2, f64, reduce_fmax_f64x2);
|
||||
red_max!(f32x4, f32, reduce_fmax_f32x4);
|
||||
red_max!(f64x4, f64, reduce_fmax_f64x4);
|
||||
red_max!(f32x8, f32, reduce_fmax_f32x8);
|
||||
red_max!(f64x8, f64, reduce_fmax_f64x8);
|
||||
red_max!(f32x16, f32, reduce_fmax_f32x16);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ReduceMax;
|
||||
use coresimd::simd::*;
|
||||
|
||||
// note: these are tested in the portable vector API tests
|
||||
|
||||
#[test]
|
||||
fn reduce_max_i32x4() {
|
||||
let v = i32x4::new(1, 2, -1, 3);
|
||||
assert_eq!(v.reduce_max(), 3_i32);
|
||||
}
|
||||
#[test]
|
||||
fn reduce_max_u32x4() {
|
||||
let v = u32x4::new(4, 2, 7, 3);
|
||||
assert_eq!(v.reduce_max(), 7_u32);
|
||||
}
|
||||
#[test]
|
||||
fn reduce_max_f32x4() {
|
||||
let v = f32x4::new(4., 2., -1., 3.);
|
||||
assert_eq!(v.reduce_max(), 4.);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,196 +0,0 @@
|
|||
//! Code generation for the min reduction.
|
||||
use coresimd::simd::*;
|
||||
|
||||
/// LLVM intrinsics used in the min reduction
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i8.v2i8"]
|
||||
fn reduce_min_i8x2(x: i8x2) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u8.v2u8"]
|
||||
fn reduce_min_u8x2(x: u8x2) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i16.v2i16"]
|
||||
fn reduce_min_i16x2(x: i16x2) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u16.v2u16"]
|
||||
fn reduce_min_u16x2(x: u16x2) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i32.v2i32"]
|
||||
fn reduce_min_i32x2(x: i32x2) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u32.v2u32"]
|
||||
fn reduce_min_u32x2(x: u32x2) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i64.v2i64"]
|
||||
fn reduce_min_i64x2(x: i64x2) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u64.v2u64"]
|
||||
fn reduce_min_u64x2(x: u64x2) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i8.v4i8"]
|
||||
fn reduce_min_i8x4(x: i8x4) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u8.v4u8"]
|
||||
fn reduce_min_u8x4(x: u8x4) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i16.v4i16"]
|
||||
fn reduce_min_i16x4(x: i16x4) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u16.v4u16"]
|
||||
fn reduce_min_u16x4(x: u16x4) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i32.v4i32"]
|
||||
fn reduce_min_i32x4(x: i32x4) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u32.v4u32"]
|
||||
fn reduce_min_u32x4(x: u32x4) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i64.v4i64"]
|
||||
fn reduce_min_i64x4(x: i64x4) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u64.v4u64"]
|
||||
fn reduce_min_u64x4(x: u64x4) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i8.v8i8"]
|
||||
fn reduce_min_i8x8(x: i8x8) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u8.v8u8"]
|
||||
fn reduce_min_u8x8(x: u8x8) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i16.v8i16"]
|
||||
fn reduce_min_i16x8(x: i16x8) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u16.v8u16"]
|
||||
fn reduce_min_u16x8(x: u16x8) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i32.v8i32"]
|
||||
fn reduce_min_i32x8(x: i32x8) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u32.v8u32"]
|
||||
fn reduce_min_u32x8(x: u32x8) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i64.v8i64"]
|
||||
fn reduce_min_i64x8(x: i64x8) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u64.v8u64"]
|
||||
fn reduce_min_u64x8(x: u64x8) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i8.v16i8"]
|
||||
fn reduce_min_i8x16(x: i8x16) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u8.v16u8"]
|
||||
fn reduce_min_u8x16(x: u8x16) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i16.v16i16"]
|
||||
fn reduce_min_i16x16(x: i16x16) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u16.v16u16"]
|
||||
fn reduce_min_u16x16(x: u16x16) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i32.v16i32"]
|
||||
fn reduce_min_i32x16(x: i32x16) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u32.v16u32"]
|
||||
fn reduce_min_u32x16(x: u32x16) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i8.v32i8"]
|
||||
fn reduce_min_i8x32(x: i8x32) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u8.v32u8"]
|
||||
fn reduce_min_u8x32(x: u8x32) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i16.v32i16"]
|
||||
fn reduce_min_i16x32(x: i16x32) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u16.v32u16"]
|
||||
fn reduce_min_u16x32(x: u16x32) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.smin.i8.v64i8"]
|
||||
fn reduce_min_i8x64(x: i8x64) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.umin.u8.v64u8"]
|
||||
fn reduce_min_u8x64(x: u8x64) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmin.f32.v2f32"]
|
||||
fn reduce_fmin_f32x2(x: f32x2) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmin.f64.v2f64"]
|
||||
fn reduce_fmin_f64x2(x: f64x2) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmin.f32.v4f32"]
|
||||
fn reduce_fmin_f32x4(x: f32x4) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmin.f64.v4f64"]
|
||||
fn reduce_fmin_f64x4(x: f64x4) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmin.f32.v8f32"]
|
||||
fn reduce_fmin_f32x8(x: f32x8) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmin.f64.v8f64"]
|
||||
fn reduce_fmin_f64x8(x: f64x8) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmin.f32.v16f32"]
|
||||
fn reduce_fmin_f32x16(x: f32x16) -> f32;
|
||||
}
|
||||
|
||||
/// Reduction: horizontal max of the vector elements.
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
pub trait ReduceMin {
|
||||
/// Result type of the reduction.
|
||||
type Acc;
|
||||
/// Computes the horizontal max of the vector elements.
|
||||
fn reduce_min(self) -> Self::Acc;
|
||||
}
|
||||
|
||||
macro_rules! red_min {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceMin for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
fn reduce_min(self) -> Self::Acc {
|
||||
unsafe { $llvm_intr(self) }
|
||||
}
|
||||
// FIXME: broken on AArch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[allow(unused_imports)]
|
||||
#[inline]
|
||||
fn reduce_min(self) -> Self::Acc {
|
||||
use num::Float;
|
||||
use cmp::Ord;
|
||||
let mut x = self.extract(0);
|
||||
for i in 1..$id::lanes() {
|
||||
x = x.min(self.extract(i));
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
red_min!(i8x2, i8, reduce_min_i8x2);
|
||||
red_min!(u8x2, u8, reduce_min_u8x2);
|
||||
red_min!(i16x2, i16, reduce_min_i16x2);
|
||||
red_min!(u16x2, u16, reduce_min_u16x2);
|
||||
red_min!(i32x2, i32, reduce_min_i32x2);
|
||||
red_min!(u32x2, u32, reduce_min_u32x2);
|
||||
red_min!(i64x2, i64, reduce_min_i64x2);
|
||||
red_min!(u64x2, u64, reduce_min_u64x2);
|
||||
red_min!(i8x4, i8, reduce_min_i8x4);
|
||||
red_min!(u8x4, u8, reduce_min_u8x4);
|
||||
red_min!(i16x4, i16, reduce_min_i16x4);
|
||||
red_min!(u16x4, u16, reduce_min_u16x4);
|
||||
red_min!(i32x4, i32, reduce_min_i32x4);
|
||||
red_min!(u32x4, u32, reduce_min_u32x4);
|
||||
red_min!(i64x4, i64, reduce_min_i64x4);
|
||||
red_min!(u64x4, u64, reduce_min_u64x4);
|
||||
red_min!(i8x8, i8, reduce_min_i8x8);
|
||||
red_min!(u8x8, u8, reduce_min_u8x8);
|
||||
red_min!(i16x8, i16, reduce_min_i16x8);
|
||||
red_min!(u16x8, u16, reduce_min_u16x8);
|
||||
red_min!(i32x8, i32, reduce_min_i32x8);
|
||||
red_min!(u32x8, u32, reduce_min_u32x8);
|
||||
red_min!(i64x8, i64, reduce_min_i64x8);
|
||||
red_min!(u64x8, u64, reduce_min_u64x8);
|
||||
red_min!(i8x16, i8, reduce_min_i8x16);
|
||||
red_min!(u8x16, u8, reduce_min_u8x16);
|
||||
red_min!(i16x16, i16, reduce_min_i16x16);
|
||||
red_min!(u16x16, u16, reduce_min_u16x16);
|
||||
red_min!(i32x16, i32, reduce_min_i32x16);
|
||||
red_min!(u32x16, u32, reduce_min_u32x16);
|
||||
red_min!(i8x32, i8, reduce_min_i8x32);
|
||||
red_min!(u8x32, u8, reduce_min_u8x32);
|
||||
red_min!(i16x32, i16, reduce_min_i16x32);
|
||||
red_min!(u16x32, u16, reduce_min_u16x32);
|
||||
red_min!(i8x64, i8, reduce_min_i8x64);
|
||||
red_min!(u8x64, u8, reduce_min_u8x64);
|
||||
|
||||
red_min!(f32x2, f32, reduce_fmin_f32x2);
|
||||
red_min!(f64x2, f64, reduce_fmin_f64x2);
|
||||
red_min!(f32x4, f32, reduce_fmin_f32x4);
|
||||
red_min!(f64x4, f64, reduce_fmin_f64x4);
|
||||
red_min!(f32x8, f32, reduce_fmin_f32x8);
|
||||
red_min!(f64x8, f64, reduce_fmin_f64x8);
|
||||
red_min!(f32x16, f32, reduce_fmin_f32x16);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ReduceMin;
|
||||
use coresimd::simd::*;
|
||||
|
||||
// note: these are tested in the portable vector API tests
|
||||
|
||||
#[test]
|
||||
fn reduce_min_i32x4() {
|
||||
let v = i32x4::new(1, 2, -1, 3);
|
||||
assert_eq!(v.reduce_min(), -1_i32);
|
||||
}
|
||||
#[test]
|
||||
fn reduce_min_u32x4() {
|
||||
let v = u32x4::new(4, 2, 7, 3);
|
||||
assert_eq!(v.reduce_min(), 2_u32);
|
||||
}
|
||||
#[test]
|
||||
fn reduce_min_f32x4() {
|
||||
let v = f32x4::new(4., 2., -1., 3.);
|
||||
assert_eq!(v.reduce_min(), -1.);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
//! Code Generation
|
||||
|
||||
pub mod sum;
|
||||
pub mod product;
|
||||
pub mod and;
|
||||
pub mod or;
|
||||
pub mod xor;
|
||||
pub mod min;
|
||||
pub mod max;
|
||||
|
|
@ -1,170 +0,0 @@
|
|||
//! Code generation for the or reduction.
|
||||
use coresimd::simd::*;
|
||||
|
||||
/// LLVM intrinsics used in the or reduction
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i8.v2i8"]
|
||||
fn reduce_or_i8x2(x: i8x2) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u8.v2u8"]
|
||||
fn reduce_or_u8x2(x: u8x2) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i16.v2i16"]
|
||||
fn reduce_or_i16x2(x: i16x2) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u16.v2u16"]
|
||||
fn reduce_or_u16x2(x: u16x2) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i32.v2i32"]
|
||||
fn reduce_or_i32x2(x: i32x2) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u32.v2u32"]
|
||||
fn reduce_or_u32x2(x: u32x2) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i64.v2i64"]
|
||||
fn reduce_or_i64x2(x: i64x2) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u64.v2u64"]
|
||||
fn reduce_or_u64x2(x: u64x2) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i8.v4i8"]
|
||||
fn reduce_or_i8x4(x: i8x4) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u8.v4u8"]
|
||||
fn reduce_or_u8x4(x: u8x4) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i16.v4i16"]
|
||||
fn reduce_or_i16x4(x: i16x4) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u16.v4u16"]
|
||||
fn reduce_or_u16x4(x: u16x4) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i32.v4i32"]
|
||||
fn reduce_or_i32x4(x: i32x4) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u32.v4u32"]
|
||||
fn reduce_or_u32x4(x: u32x4) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i64.v4i64"]
|
||||
fn reduce_or_i64x4(x: i64x4) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u64.v4u64"]
|
||||
fn reduce_or_u64x4(x: u64x4) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i8.v8i8"]
|
||||
fn reduce_or_i8x8(x: i8x8) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u8.v8u8"]
|
||||
fn reduce_or_u8x8(x: u8x8) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i16.v8i16"]
|
||||
fn reduce_or_i16x8(x: i16x8) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u16.v8u16"]
|
||||
fn reduce_or_u16x8(x: u16x8) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i32.v8i32"]
|
||||
fn reduce_or_i32x8(x: i32x8) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u32.v8u32"]
|
||||
fn reduce_or_u32x8(x: u32x8) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i64.v8i64"]
|
||||
fn reduce_or_i64x8(x: i64x8) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u64.v8u64"]
|
||||
fn reduce_or_u64x8(x: u64x8) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i8.v16i8"]
|
||||
fn reduce_or_i8x16(x: i8x16) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u8.v16u8"]
|
||||
fn reduce_or_u8x16(x: u8x16) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i16.v16i16"]
|
||||
fn reduce_or_i16x16(x: i16x16) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u16.v16u16"]
|
||||
fn reduce_or_u16x16(x: u16x16) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i32.v16i32"]
|
||||
fn reduce_or_i32x16(x: i32x16) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u32.v16u32"]
|
||||
fn reduce_or_u32x16(x: u32x16) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i8.v32i8"]
|
||||
fn reduce_or_i8x32(x: i8x32) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u8.v32u8"]
|
||||
fn reduce_or_u8x32(x: u8x32) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i16.v32i16"]
|
||||
fn reduce_or_i16x32(x: i16x32) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u16.v32u16"]
|
||||
fn reduce_or_u16x32(x: u16x32) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.i8.v64i8"]
|
||||
fn reduce_or_i8x64(x: i8x64) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.or.u8.v64u8"]
|
||||
fn reduce_or_u8x64(x: u8x64) -> u8;
|
||||
}
|
||||
|
||||
/// Reduction: horizontal bitwise or of the vector elements.
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
pub trait ReduceOr {
|
||||
/// Result of the reduction.
|
||||
type Acc;
|
||||
/// Computes the horizontal bitwise or of the vector elements.
|
||||
fn reduce_or(self) -> Self::Acc;
|
||||
}
|
||||
|
||||
macro_rules! red_or {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceOr for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
fn reduce_or(self) -> Self::Acc {
|
||||
unsafe { $llvm_intr(self.into_bits()) }
|
||||
}
|
||||
// FIXME: broken in AArch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
fn reduce_or(self) -> Self::Acc {
|
||||
let mut x = self.extract(0) as Self::Acc;
|
||||
for i in 1..$id::lanes() {
|
||||
x |= self.extract(i) as Self::Acc;
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
red_or!(i8x2, i8, reduce_or_i8x2);
|
||||
red_or!(u8x2, u8, reduce_or_u8x2);
|
||||
red_or!(i16x2, i16, reduce_or_i16x2);
|
||||
red_or!(u16x2, u16, reduce_or_u16x2);
|
||||
red_or!(i32x2, i32, reduce_or_i32x2);
|
||||
red_or!(u32x2, u32, reduce_or_u32x2);
|
||||
red_or!(i64x2, i64, reduce_or_i64x2);
|
||||
red_or!(u64x2, u64, reduce_or_u64x2);
|
||||
red_or!(i8x4, i8, reduce_or_i8x4);
|
||||
red_or!(u8x4, u8, reduce_or_u8x4);
|
||||
red_or!(i16x4, i16, reduce_or_i16x4);
|
||||
red_or!(u16x4, u16, reduce_or_u16x4);
|
||||
red_or!(i32x4, i32, reduce_or_i32x4);
|
||||
red_or!(u32x4, u32, reduce_or_u32x4);
|
||||
red_or!(i64x4, i64, reduce_or_i64x4);
|
||||
red_or!(u64x4, u64, reduce_or_u64x4);
|
||||
red_or!(i8x8, i8, reduce_or_i8x8);
|
||||
red_or!(u8x8, u8, reduce_or_u8x8);
|
||||
red_or!(i16x8, i16, reduce_or_i16x8);
|
||||
red_or!(u16x8, u16, reduce_or_u16x8);
|
||||
red_or!(i32x8, i32, reduce_or_i32x8);
|
||||
red_or!(u32x8, u32, reduce_or_u32x8);
|
||||
red_or!(i64x8, i64, reduce_or_i64x8);
|
||||
red_or!(u64x8, u64, reduce_or_u64x8);
|
||||
red_or!(i8x16, i8, reduce_or_i8x16);
|
||||
red_or!(u8x16, u8, reduce_or_u8x16);
|
||||
red_or!(i16x16, i16, reduce_or_i16x16);
|
||||
red_or!(u16x16, u16, reduce_or_u16x16);
|
||||
red_or!(i32x16, i32, reduce_or_i32x16);
|
||||
red_or!(u32x16, u32, reduce_or_u32x16);
|
||||
red_or!(i8x32, i8, reduce_or_i8x32);
|
||||
red_or!(u8x32, u8, reduce_or_u8x32);
|
||||
red_or!(i16x32, i16, reduce_or_i16x32);
|
||||
red_or!(u16x32, u16, reduce_or_u16x32);
|
||||
red_or!(i8x64, i8, reduce_or_i8x64);
|
||||
red_or!(u8x64, u8, reduce_or_u8x64);
|
||||
|
||||
red_or!(b8x2, i8, reduce_or_i8x2);
|
||||
red_or!(b8x4, i8, reduce_or_i8x4);
|
||||
red_or!(b8x8, i8, reduce_or_i8x8);
|
||||
red_or!(b8x16, i8, reduce_or_i8x16);
|
||||
red_or!(b8x32, i8, reduce_or_i8x32);
|
||||
red_or!(b8x64, i8, reduce_or_i8x64);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ReduceOr;
|
||||
use coresimd::simd::*;
|
||||
|
||||
// note: these are tested in the portable vector API tests
|
||||
|
||||
#[test]
|
||||
fn reduce_or_i32x4() {
|
||||
let v = i32x4::splat(1);
|
||||
assert_eq!(v.reduce_or(), 1_i32);
|
||||
let v = i32x4::new(1, 1, 0, 1);
|
||||
assert_eq!(v.reduce_or(), 1_i32);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
//! Code generation for the product reduction.
|
||||
use coresimd::simd::*;
|
||||
|
||||
/// LLVM intrinsics used in the product reduction
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i8.v2i8"]
|
||||
fn reduce_mul_i8x2(x: i8x2) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u8.v2u8"]
|
||||
fn reduce_mul_u8x2(x: u8x2) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i16.v2i16"]
|
||||
fn reduce_mul_i16x2(x: i16x2) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u16.v2u16"]
|
||||
fn reduce_mul_u16x2(x: u16x2) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i32.v2i32"]
|
||||
fn reduce_mul_i32x2(x: i32x2) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u32.v2u32"]
|
||||
fn reduce_mul_u32x2(x: u32x2) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i64.v2i64"]
|
||||
fn reduce_mul_i64x2(x: i64x2) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u64.v2u64"]
|
||||
fn reduce_mul_u64x2(x: u64x2) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i8.v4i8"]
|
||||
fn reduce_mul_i8x4(x: i8x4) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u8.v4u8"]
|
||||
fn reduce_mul_u8x4(x: u8x4) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i16.v4i16"]
|
||||
fn reduce_mul_i16x4(x: i16x4) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u16.v4u16"]
|
||||
fn reduce_mul_u16x4(x: u16x4) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i32.v4i32"]
|
||||
fn reduce_mul_i32x4(x: i32x4) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u32.v4u32"]
|
||||
fn reduce_mul_u32x4(x: u32x4) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i64.v4i64"]
|
||||
fn reduce_mul_i64x4(x: i64x4) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u64.v4u64"]
|
||||
fn reduce_mul_u64x4(x: u64x4) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i8.v8i8"]
|
||||
fn reduce_mul_i8x8(x: i8x8) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u8.v8u8"]
|
||||
fn reduce_mul_u8x8(x: u8x8) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i16.v8i16"]
|
||||
fn reduce_mul_i16x8(x: i16x8) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u16.v8u16"]
|
||||
fn reduce_mul_u16x8(x: u16x8) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i32.v8i32"]
|
||||
fn reduce_mul_i32x8(x: i32x8) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u32.v8u32"]
|
||||
fn reduce_mul_u32x8(x: u32x8) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i64.v8i64"]
|
||||
fn reduce_mul_i64x8(x: i64x8) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u64.v8u64"]
|
||||
fn reduce_mul_u64x8(x: u64x8) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i8.v16i8"]
|
||||
fn reduce_mul_i8x16(x: i8x16) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u8.v16u8"]
|
||||
fn reduce_mul_u8x16(x: u8x16) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i16.v16i16"]
|
||||
fn reduce_mul_i16x16(x: i16x16) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u16.v16u16"]
|
||||
fn reduce_mul_u16x16(x: u16x16) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i32.v16i32"]
|
||||
fn reduce_mul_i32x16(x: i32x16) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u32.v16u32"]
|
||||
fn reduce_mul_u32x16(x: u32x16) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i8.v32i8"]
|
||||
fn reduce_mul_i8x32(x: i8x32) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u8.v32u8"]
|
||||
fn reduce_mul_u8x32(x: u8x32) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i16.v32i16"]
|
||||
fn reduce_mul_i16x32(x: i16x32) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u16.v32u16"]
|
||||
fn reduce_mul_u16x32(x: u16x32) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.i8.v64i8"]
|
||||
fn reduce_mul_i8x64(x: i8x64) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.mul.u8.v64u8"]
|
||||
fn reduce_mul_u8x64(x: u8x64) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmul.f32.v2f32"]
|
||||
fn reduce_fmul_f32x2(acc: f32, x: f32x2) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmul.f64.v2f64"]
|
||||
fn reduce_fmul_f64x2(acc: f64, x: f64x2) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmul.f32.v4f32"]
|
||||
fn reduce_fmul_f32x4(acc: f32, x: f32x4) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmul.f64.v4f64"]
|
||||
fn reduce_fmul_f64x4(acc: f64, x: f64x4) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmul.f32.v8f32"]
|
||||
fn reduce_fmul_f32x8(acc: f32, x: f32x8) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmul.f64.v8f64"]
|
||||
fn reduce_fmul_f64x8(acc: f64, x: f64x8) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fmul.f32.v16f32"]
|
||||
fn reduce_fmul_f32x16(acc: f32, x: f32x16) -> f32;
|
||||
}
|
||||
|
||||
/// Reduction: horizontal product of the vector elements.
|
||||
pub trait ReduceMul {
|
||||
/// Result type of the reduction.
|
||||
type Acc;
|
||||
/// Computes the horizontal product of the vector elements.
|
||||
fn reduce_mul(self) -> Self::Acc;
|
||||
}
|
||||
|
||||
macro_rules! red_mul {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceMul for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
fn reduce_mul(self) -> Self::Acc {
|
||||
unsafe { $llvm_intr(self) }
|
||||
}
|
||||
// FIXME: broken in AArch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
fn reduce_mul(self) -> Self::Acc {
|
||||
let mut x = self.extract(0);
|
||||
for i in 1..$id::lanes() {
|
||||
x *= self.extract(i);
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
red_mul!(i8x2, i8, reduce_mul_i8x2);
|
||||
red_mul!(u8x2, u8, reduce_mul_u8x2);
|
||||
red_mul!(i16x2, i16, reduce_mul_i16x2);
|
||||
red_mul!(u16x2, u16, reduce_mul_u16x2);
|
||||
red_mul!(i32x2, i32, reduce_mul_i32x2);
|
||||
red_mul!(u32x2, u32, reduce_mul_u32x2);
|
||||
red_mul!(i64x2, i64, reduce_mul_i64x2);
|
||||
red_mul!(u64x2, u64, reduce_mul_u64x2);
|
||||
red_mul!(i8x4, i8, reduce_mul_i8x4);
|
||||
red_mul!(u8x4, u8, reduce_mul_u8x4);
|
||||
red_mul!(i16x4, i16, reduce_mul_i16x4);
|
||||
red_mul!(u16x4, u16, reduce_mul_u16x4);
|
||||
red_mul!(i32x4, i32, reduce_mul_i32x4);
|
||||
red_mul!(u32x4, u32, reduce_mul_u32x4);
|
||||
red_mul!(i64x4, i64, reduce_mul_i64x4);
|
||||
red_mul!(u64x4, u64, reduce_mul_u64x4);
|
||||
red_mul!(i8x8, i8, reduce_mul_i8x8);
|
||||
red_mul!(u8x8, u8, reduce_mul_u8x8);
|
||||
red_mul!(i16x8, i16, reduce_mul_i16x8);
|
||||
red_mul!(u16x8, u16, reduce_mul_u16x8);
|
||||
red_mul!(i32x8, i32, reduce_mul_i32x8);
|
||||
red_mul!(u32x8, u32, reduce_mul_u32x8);
|
||||
red_mul!(i64x8, i64, reduce_mul_i64x8);
|
||||
red_mul!(u64x8, u64, reduce_mul_u64x8);
|
||||
red_mul!(i8x16, i8, reduce_mul_i8x16);
|
||||
red_mul!(u8x16, u8, reduce_mul_u8x16);
|
||||
red_mul!(i16x16, i16, reduce_mul_i16x16);
|
||||
red_mul!(u16x16, u16, reduce_mul_u16x16);
|
||||
red_mul!(i32x16, i32, reduce_mul_i32x16);
|
||||
red_mul!(u32x16, u32, reduce_mul_u32x16);
|
||||
red_mul!(i8x32, i8, reduce_mul_i8x32);
|
||||
red_mul!(u8x32, u8, reduce_mul_u8x32);
|
||||
red_mul!(i16x32, i16, reduce_mul_i16x32);
|
||||
red_mul!(u16x32, u16, reduce_mul_u16x32);
|
||||
red_mul!(i8x64, i8, reduce_mul_i8x64);
|
||||
red_mul!(u8x64, u8, reduce_mul_u8x64);
|
||||
|
||||
macro_rules! red_fmul {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceMul for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[inline]
|
||||
fn reduce_mul(self) -> Self::Acc {
|
||||
// FIXME:
|
||||
// unsafe { $llvm_intr(1. as $elem_ty, self) }
|
||||
let mut x = self.extract(0);
|
||||
for i in 1..$id::lanes() {
|
||||
x *= self.extract(i);
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
red_fmul!(f32x2, f32, reduce_fmul_f32x2);
|
||||
red_fmul!(f64x2, f64, reduce_fmul_f64x2);
|
||||
red_fmul!(f32x4, f32, reduce_fmul_f32x4);
|
||||
red_fmul!(f64x4, f64, reduce_fmul_f64x4);
|
||||
red_fmul!(f32x8, f32, reduce_fmul_f32x8);
|
||||
red_fmul!(f64x8, f64, reduce_fmul_f64x8);
|
||||
red_fmul!(f32x16, f32, reduce_fmul_f32x16);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ReduceMul;
|
||||
use coresimd::simd::*;
|
||||
|
||||
// note: these are tested in the portable vector API tests
|
||||
|
||||
#[test]
|
||||
fn reduce_mul_i32x4() {
|
||||
let v = i32x4::splat(2);
|
||||
assert_eq!(v.reduce_mul(), 16_i32);
|
||||
}
|
||||
#[test]
|
||||
fn reduce_mul_u32x4() {
|
||||
let v = u32x4::splat(2);
|
||||
assert_eq!(v.reduce_mul(), 16_u32);
|
||||
}
|
||||
#[test]
|
||||
fn reduce_mul_f32x4() {
|
||||
let v = f32x4::splat(2.);
|
||||
assert_eq!(v.reduce_mul(), 16.);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,210 +0,0 @@
|
|||
//! Code generation for the sum reduction.
|
||||
use coresimd::simd::*;
|
||||
|
||||
/// LLVM intrinsics used in the sum reduction
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i8.v2i8"]
|
||||
fn reduce_add_i8x2(x: i8x2) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u8.v2u8"]
|
||||
fn reduce_add_u8x2(x: u8x2) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i16.v2i16"]
|
||||
fn reduce_add_i16x2(x: i16x2) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u16.v2u16"]
|
||||
fn reduce_add_u16x2(x: u16x2) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i32.v2i32"]
|
||||
fn reduce_add_i32x2(x: i32x2) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u32.v2u32"]
|
||||
fn reduce_add_u32x2(x: u32x2) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i64.v2i64"]
|
||||
fn reduce_add_i64x2(x: i64x2) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u64.v2u64"]
|
||||
fn reduce_add_u64x2(x: u64x2) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i8.v4i8"]
|
||||
fn reduce_add_i8x4(x: i8x4) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u8.v4u8"]
|
||||
fn reduce_add_u8x4(x: u8x4) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i16.v4i16"]
|
||||
fn reduce_add_i16x4(x: i16x4) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u16.v4u16"]
|
||||
fn reduce_add_u16x4(x: u16x4) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i32.v4i32"]
|
||||
fn reduce_add_i32x4(x: i32x4) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u32.v4u32"]
|
||||
fn reduce_add_u32x4(x: u32x4) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i64.v4i64"]
|
||||
fn reduce_add_i64x4(x: i64x4) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u64.v4u64"]
|
||||
fn reduce_add_u64x4(x: u64x4) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i8.v8i8"]
|
||||
fn reduce_add_i8x8(x: i8x8) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u8.v8u8"]
|
||||
fn reduce_add_u8x8(x: u8x8) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i16.v8i16"]
|
||||
fn reduce_add_i16x8(x: i16x8) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u16.v8u16"]
|
||||
fn reduce_add_u16x8(x: u16x8) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i32.v8i32"]
|
||||
fn reduce_add_i32x8(x: i32x8) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u32.v8u32"]
|
||||
fn reduce_add_u32x8(x: u32x8) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i64.v8i64"]
|
||||
fn reduce_add_i64x8(x: i64x8) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u64.v8u64"]
|
||||
fn reduce_add_u64x8(x: u64x8) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i8.v16i8"]
|
||||
fn reduce_add_i8x16(x: i8x16) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u8.v16u8"]
|
||||
fn reduce_add_u8x16(x: u8x16) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i16.v16i16"]
|
||||
fn reduce_add_i16x16(x: i16x16) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u16.v16u16"]
|
||||
fn reduce_add_u16x16(x: u16x16) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i32.v16i32"]
|
||||
fn reduce_add_i32x16(x: i32x16) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u32.v16u32"]
|
||||
fn reduce_add_u32x16(x: u32x16) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i8.v32i8"]
|
||||
fn reduce_add_i8x32(x: i8x32) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u8.v32u8"]
|
||||
fn reduce_add_u8x32(x: u8x32) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i16.v32i16"]
|
||||
fn reduce_add_i16x32(x: i16x32) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u16.v32u16"]
|
||||
fn reduce_add_u16x32(x: u16x32) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.i8.v64i8"]
|
||||
fn reduce_add_i8x64(x: i8x64) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.add.u8.v64u8"]
|
||||
fn reduce_add_u8x64(x: u8x64) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fadd.f32.v2f32"]
|
||||
fn reduce_fadd_f32x2(acc: f32, x: f32x2) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fadd.f64.v2f64"]
|
||||
fn reduce_fadd_f64x2(acc: f64, x: f64x2) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fadd.f32.v4f32"]
|
||||
fn reduce_fadd_f32x4(acc: f32, x: f32x4) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fadd.f64.v4f64"]
|
||||
fn reduce_fadd_f64x4(acc: f64, x: f64x4) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fadd.f32.v8f32"]
|
||||
fn reduce_fadd_f32x8(acc: f32, x: f32x8) -> f32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fadd.f64.v8f64"]
|
||||
fn reduce_fadd_f64x8(acc: f64, x: f64x8) -> f64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.fadd.f32.v16f32"]
|
||||
fn reduce_fadd_f32x16(acc: f32, x: f32x16) -> f32;
|
||||
}
|
||||
|
||||
/// Reduction: horizontal sum of the vector elements.
|
||||
pub trait ReduceAdd {
|
||||
/// Result type of the reduction.
|
||||
type Acc;
|
||||
/// Computes the horizontal sum of the vector elements.
|
||||
fn reduce_add(self) -> Self::Acc;
|
||||
}
|
||||
|
||||
macro_rules! red_add {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceAdd for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
fn reduce_add(self) -> Self::Acc {
|
||||
unsafe { $llvm_intr(self) }
|
||||
}
|
||||
// FIXME: broken in AArch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
fn reduce_add(self) -> Self::Acc {
|
||||
let mut x = self.extract(0) as Self::Acc;
|
||||
for i in 1..$id::lanes() {
|
||||
x += self.extract(i) as Self::Acc;
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
red_add!(i8x2, i8, reduce_add_i8x2);
|
||||
red_add!(u8x2, u8, reduce_add_u8x2);
|
||||
red_add!(i16x2, i16, reduce_add_i16x2);
|
||||
red_add!(u16x2, u16, reduce_add_u16x2);
|
||||
red_add!(i32x2, i32, reduce_add_i32x2);
|
||||
red_add!(u32x2, u32, reduce_add_u32x2);
|
||||
red_add!(i64x2, i64, reduce_add_i64x2);
|
||||
red_add!(u64x2, u64, reduce_add_u64x2);
|
||||
red_add!(i8x4, i8, reduce_add_i8x4);
|
||||
red_add!(u8x4, u8, reduce_add_u8x4);
|
||||
red_add!(i16x4, i16, reduce_add_i16x4);
|
||||
red_add!(u16x4, u16, reduce_add_u16x4);
|
||||
red_add!(i32x4, i32, reduce_add_i32x4);
|
||||
red_add!(u32x4, u32, reduce_add_u32x4);
|
||||
red_add!(i64x4, i64, reduce_add_i64x4);
|
||||
red_add!(u64x4, u64, reduce_add_u64x4);
|
||||
red_add!(i8x8, i8, reduce_add_i8x8);
|
||||
red_add!(u8x8, u8, reduce_add_u8x8);
|
||||
red_add!(i16x8, i16, reduce_add_i16x8);
|
||||
red_add!(u16x8, u16, reduce_add_u16x8);
|
||||
red_add!(i32x8, i32, reduce_add_i32x8);
|
||||
red_add!(u32x8, u32, reduce_add_u32x8);
|
||||
red_add!(i64x8, i64, reduce_add_i64x8);
|
||||
red_add!(u64x8, u64, reduce_add_u64x8);
|
||||
red_add!(i8x16, i8, reduce_add_i8x16);
|
||||
red_add!(u8x16, u8, reduce_add_u8x16);
|
||||
red_add!(i16x16, i16, reduce_add_i16x16);
|
||||
red_add!(u16x16, u16, reduce_add_u16x16);
|
||||
red_add!(i32x16, i32, reduce_add_i32x16);
|
||||
red_add!(u32x16, u32, reduce_add_u32x16);
|
||||
red_add!(i8x32, i8, reduce_add_i8x32);
|
||||
red_add!(u8x32, u8, reduce_add_u8x32);
|
||||
red_add!(i16x32, i16, reduce_add_i16x32);
|
||||
red_add!(u16x32, u16, reduce_add_u16x32);
|
||||
red_add!(i8x64, i8, reduce_add_i8x64);
|
||||
red_add!(u8x64, u8, reduce_add_u8x64);
|
||||
|
||||
macro_rules! red_fadd {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceAdd for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[inline]
|
||||
fn reduce_add(self) -> Self::Acc {
|
||||
// FIXME:
|
||||
//unsafe { $llvm_intr(0. as $elem_ty, self) }
|
||||
let mut x = self.extract(0);
|
||||
for i in 1..$id::lanes() {
|
||||
x += self.extract(i);
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
red_fadd!(f32x2, f32, reduce_fadd_f32x2);
|
||||
red_fadd!(f64x2, f64, reduce_fadd_f64x2);
|
||||
red_fadd!(f32x4, f32, reduce_fadd_f32x4);
|
||||
red_fadd!(f64x4, f64, reduce_fadd_f64x4);
|
||||
red_fadd!(f32x8, f32, reduce_fadd_f32x8);
|
||||
red_fadd!(f64x8, f64, reduce_fadd_f64x8);
|
||||
red_fadd!(f32x16, f32, reduce_fadd_f32x16);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ReduceAdd;
|
||||
use coresimd::simd::*;
|
||||
|
||||
// note: these are tested in the portable vector API tests
|
||||
|
||||
#[test]
|
||||
fn reduce_add_i32x4() {
|
||||
let v = i32x4::splat(1);
|
||||
assert_eq!(v.reduce_add(), 4_i32);
|
||||
}
|
||||
#[test]
|
||||
fn reduce_add_u32x4() {
|
||||
let v = u32x4::splat(1);
|
||||
assert_eq!(v.reduce_add(), 4_u32);
|
||||
}
|
||||
#[test]
|
||||
fn reduce_add_f32x4() {
|
||||
let v = f32x4::splat(1.);
|
||||
assert_eq!(v.reduce_add(), 4.);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,170 +0,0 @@
|
|||
//! Code generation for the xor reduction.
|
||||
use coresimd::simd::*;
|
||||
|
||||
/// LLVM intrinsics used in the xor reduction
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i8.v2i8"]
|
||||
fn reduce_xor_i8x2(x: i8x2) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u8.v2u8"]
|
||||
fn reduce_xor_u8x2(x: u8x2) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i16.v2i16"]
|
||||
fn reduce_xor_i16x2(x: i16x2) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u16.v2u16"]
|
||||
fn reduce_xor_u16x2(x: u16x2) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i32.v2i32"]
|
||||
fn reduce_xor_i32x2(x: i32x2) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u32.v2u32"]
|
||||
fn reduce_xor_u32x2(x: u32x2) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i64.v2i64"]
|
||||
fn reduce_xor_i64x2(x: i64x2) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u64.v2u64"]
|
||||
fn reduce_xor_u64x2(x: u64x2) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i8.v4i8"]
|
||||
fn reduce_xor_i8x4(x: i8x4) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u8.v4u8"]
|
||||
fn reduce_xor_u8x4(x: u8x4) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i16.v4i16"]
|
||||
fn reduce_xor_i16x4(x: i16x4) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u16.v4u16"]
|
||||
fn reduce_xor_u16x4(x: u16x4) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i32.v4i32"]
|
||||
fn reduce_xor_i32x4(x: i32x4) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u32.v4u32"]
|
||||
fn reduce_xor_u32x4(x: u32x4) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i64.v4i64"]
|
||||
fn reduce_xor_i64x4(x: i64x4) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u64.v4u64"]
|
||||
fn reduce_xor_u64x4(x: u64x4) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i8.v8i8"]
|
||||
fn reduce_xor_i8x8(x: i8x8) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u8.v8u8"]
|
||||
fn reduce_xor_u8x8(x: u8x8) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i16.v8i16"]
|
||||
fn reduce_xor_i16x8(x: i16x8) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u16.v8u16"]
|
||||
fn reduce_xor_u16x8(x: u16x8) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i32.v8i32"]
|
||||
fn reduce_xor_i32x8(x: i32x8) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u32.v8u32"]
|
||||
fn reduce_xor_u32x8(x: u32x8) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i64.v8i64"]
|
||||
fn reduce_xor_i64x8(x: i64x8) -> i64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u64.v8u64"]
|
||||
fn reduce_xor_u64x8(x: u64x8) -> u64;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i8.v16i8"]
|
||||
fn reduce_xor_i8x16(x: i8x16) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u8.v16u8"]
|
||||
fn reduce_xor_u8x16(x: u8x16) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i16.v16i16"]
|
||||
fn reduce_xor_i16x16(x: i16x16) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u16.v16u16"]
|
||||
fn reduce_xor_u16x16(x: u16x16) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i32.v16i32"]
|
||||
fn reduce_xor_i32x16(x: i32x16) -> i32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u32.v16u32"]
|
||||
fn reduce_xor_u32x16(x: u32x16) -> u32;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i8.v32i8"]
|
||||
fn reduce_xor_i8x32(x: i8x32) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u8.v32u8"]
|
||||
fn reduce_xor_u8x32(x: u8x32) -> u8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i16.v32i16"]
|
||||
fn reduce_xor_i16x32(x: i16x32) -> i16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u16.v32u16"]
|
||||
fn reduce_xor_u16x32(x: u16x32) -> u16;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.i8.v64i8"]
|
||||
fn reduce_xor_i8x64(x: i8x64) -> i8;
|
||||
#[link_name = "llvm.experimental.vector.reduce.xor.u8.v64u8"]
|
||||
fn reduce_xor_u8x64(x: u8x64) -> u8;
|
||||
}
|
||||
|
||||
/// Reduction: horizontal bitwise xor of the vector elements.
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(stutter))]
|
||||
pub trait ReduceXor {
|
||||
/// Result type of the reduction.
|
||||
type Acc;
|
||||
/// Computes the horizontal bitwise xor of the vector elements.
|
||||
fn reduce_xor(self) -> Self::Acc;
|
||||
}
|
||||
|
||||
macro_rules! red_xor {
|
||||
($id:ident, $elem_ty:ident, $llvm_intr:ident) => {
|
||||
impl ReduceXor for $id {
|
||||
type Acc = $elem_ty;
|
||||
#[cfg(not(target_arch = "aarch64"))]
|
||||
#[inline]
|
||||
fn reduce_xor(self) -> Self::Acc {
|
||||
unsafe { $llvm_intr(self.into_bits()) }
|
||||
}
|
||||
// FIXME: broken in AArch64
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[inline]
|
||||
fn reduce_xor(self) -> Self::Acc {
|
||||
let mut x = self.extract(0) as Self::Acc;
|
||||
for i in 1..$id::lanes() {
|
||||
x ^= self.extract(i) as Self::Acc;
|
||||
}
|
||||
x
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
red_xor!(i8x2, i8, reduce_xor_i8x2);
|
||||
red_xor!(u8x2, u8, reduce_xor_u8x2);
|
||||
red_xor!(i16x2, i16, reduce_xor_i16x2);
|
||||
red_xor!(u16x2, u16, reduce_xor_u16x2);
|
||||
red_xor!(i32x2, i32, reduce_xor_i32x2);
|
||||
red_xor!(u32x2, u32, reduce_xor_u32x2);
|
||||
red_xor!(i64x2, i64, reduce_xor_i64x2);
|
||||
red_xor!(u64x2, u64, reduce_xor_u64x2);
|
||||
red_xor!(i8x4, i8, reduce_xor_i8x4);
|
||||
red_xor!(u8x4, u8, reduce_xor_u8x4);
|
||||
red_xor!(i16x4, i16, reduce_xor_i16x4);
|
||||
red_xor!(u16x4, u16, reduce_xor_u16x4);
|
||||
red_xor!(i32x4, i32, reduce_xor_i32x4);
|
||||
red_xor!(u32x4, u32, reduce_xor_u32x4);
|
||||
red_xor!(i64x4, i64, reduce_xor_i64x4);
|
||||
red_xor!(u64x4, u64, reduce_xor_u64x4);
|
||||
red_xor!(i8x8, i8, reduce_xor_i8x8);
|
||||
red_xor!(u8x8, u8, reduce_xor_u8x8);
|
||||
red_xor!(i16x8, i16, reduce_xor_i16x8);
|
||||
red_xor!(u16x8, u16, reduce_xor_u16x8);
|
||||
red_xor!(i32x8, i32, reduce_xor_i32x8);
|
||||
red_xor!(u32x8, u32, reduce_xor_u32x8);
|
||||
red_xor!(i64x8, i64, reduce_xor_i64x8);
|
||||
red_xor!(u64x8, u64, reduce_xor_u64x8);
|
||||
red_xor!(i8x16, i8, reduce_xor_i8x16);
|
||||
red_xor!(u8x16, u8, reduce_xor_u8x16);
|
||||
red_xor!(i16x16, i16, reduce_xor_i16x16);
|
||||
red_xor!(u16x16, u16, reduce_xor_u16x16);
|
||||
red_xor!(i32x16, i32, reduce_xor_i32x16);
|
||||
red_xor!(u32x16, u32, reduce_xor_u32x16);
|
||||
red_xor!(i8x32, i8, reduce_xor_i8x32);
|
||||
red_xor!(u8x32, u8, reduce_xor_u8x32);
|
||||
red_xor!(i16x32, i16, reduce_xor_i16x32);
|
||||
red_xor!(u16x32, u16, reduce_xor_u16x32);
|
||||
red_xor!(i8x64, i8, reduce_xor_i8x64);
|
||||
red_xor!(u8x64, u8, reduce_xor_u8x64);
|
||||
|
||||
red_xor!(b8x2, i8, reduce_xor_i8x2);
|
||||
red_xor!(b8x4, i8, reduce_xor_i8x4);
|
||||
red_xor!(b8x8, i8, reduce_xor_i8x8);
|
||||
red_xor!(b8x16, i8, reduce_xor_i8x16);
|
||||
red_xor!(b8x32, i8, reduce_xor_i8x32);
|
||||
red_xor!(b8x64, i8, reduce_xor_i8x64);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::ReduceXor;
|
||||
use coresimd::simd::*;
|
||||
|
||||
// note: these are tested in the portable vector API tests
|
||||
|
||||
#[test]
|
||||
fn reduce_xor_i32x4() {
|
||||
let v = i32x4::splat(1);
|
||||
assert_eq!(v.reduce_xor(), 0_i32);
|
||||
let v = i32x4::new(1, 0, 0, 0);
|
||||
assert_eq!(v.reduce_xor(), 1_i32);
|
||||
}
|
||||
}
|
||||
|
|
@ -32,7 +32,6 @@
|
|||
|
||||
#[macro_use]
|
||||
mod api;
|
||||
mod codegen;
|
||||
|
||||
mod v16;
|
||||
mod v32;
|
||||
|
|
@ -48,16 +47,14 @@ pub use self::v128::*;
|
|||
pub use self::v256::*;
|
||||
pub use self::v512::*;
|
||||
|
||||
use marker;
|
||||
|
||||
/// Safe lossless bitwise conversion from `T` to `Self`.
|
||||
pub trait FromBits<T>: marker::Sized {
|
||||
pub trait FromBits<T>: ::marker::Sized {
|
||||
/// Safe lossless bitwise from `T` to `Self`.
|
||||
fn from_bits(T) -> Self;
|
||||
}
|
||||
|
||||
/// Safe lossless bitwise conversion from `Self` to `T`.
|
||||
pub trait IntoBits<T>: marker::Sized {
|
||||
pub trait IntoBits<T>: ::marker::Sized {
|
||||
/// Safe lossless bitwise transmute from `self` to `T`.
|
||||
fn into_bits(self) -> T;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,81 +1,78 @@
|
|||
//! 128-bit wide portable packed vector types.
|
||||
|
||||
simd_api_imports!();
|
||||
|
||||
use coresimd::simd::{b8x2, b8x4, b8x8};
|
||||
|
||||
simd_i_ty! {
|
||||
i8x16: 16, i8, b8x16, i8x16_tests |
|
||||
i8x16: 16, i8, b8x16, i8x16_tests, test_v128 |
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
||||
/// A 128-bit vector with 16 `i8` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u8x16: 16, u8, b8x16, u8x16_tests |
|
||||
u8x16: 16, u8, b8x16, u8x16_tests, test_v128 |
|
||||
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
||||
/// A 128-bit vector with 16 `u8` lanes.
|
||||
}
|
||||
|
||||
simd_b_ty! {
|
||||
b8x16: 16, i8, b8x16_tests |
|
||||
b8x16: 16, i8, b8x16_tests, test_v128 |
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 |
|
||||
/// A 128-bit vector with 16 `bool` lanes.
|
||||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i16x8: 8, i16, b8x8, i16x8_tests |
|
||||
i16x8: 8, i16, b8x8, i16x8_tests, test_v128 |
|
||||
i16, i16, i16, i16, i16, i16, i16, i16 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 128-bit vector with 8 `i16` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u16x8: 8, u16, b8x8, u16x8_tests |
|
||||
u16x8: 8, u16, b8x8, u16x8_tests, test_v128 |
|
||||
u16, u16, u16, u16, u16, u16, u16, u16 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 128-bit vector with 8 `u16` lanes.
|
||||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i32x4: 4, i32, b8x4, i32x4_tests |
|
||||
i32x4: 4, i32, b8x4, i32x4_tests, test_v128 |
|
||||
i32, i32, i32, i32 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 128-bit vector with 4 `i32` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u32x4: 4, u32, b8x4, u32x4_tests |
|
||||
u32x4: 4, u32, b8x4, u32x4_tests, test_v128 |
|
||||
u32, u32, u32, u32 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 128-bit vector with 4 `u32` lanes.
|
||||
}
|
||||
|
||||
simd_f_ty! {
|
||||
f32x4: 4, f32, b8x4, f32x4_tests |
|
||||
f32x4: 4, f32, b8x4, f32x4_tests, test_v128 |
|
||||
f32, f32, f32, f32 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 128-bit vector with 4 `f32` lanes.
|
||||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i64x2: 2, i64, b8x2, i64x2_tests |
|
||||
i64x2: 2, i64, b8x2, i64x2_tests, test_v128 |
|
||||
i64, i64 |
|
||||
x0, x1 |
|
||||
/// A 128-bit vector with 2 `u64` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u64x2: 2, u64, b8x2, u64x2_tests |
|
||||
u64x2: 2, u64, b8x2, u64x2_tests, test_v128 |
|
||||
u64, u64 |
|
||||
x0, x1 |
|
||||
/// A 128-bit vector with 2 `u64` lanes.
|
||||
}
|
||||
|
||||
simd_f_ty! {
|
||||
f64x2: 2, f64, b8x2, f64x2_tests |
|
||||
f64x2: 2, f64, b8x2, f64x2_tests, test_v128 |
|
||||
f64, f64 |
|
||||
x0, x1 |
|
||||
/// A 128-bit vector with 2 `f64` lanes.
|
||||
|
|
@ -83,7 +80,8 @@ simd_f_ty! {
|
|||
|
||||
impl_from_bits!(
|
||||
u64x2: u64,
|
||||
u64x2_from_bits | i64x2,
|
||||
u64x2_from_bits,
|
||||
test_v128 | i64x2,
|
||||
f64x2,
|
||||
u32x4,
|
||||
i32x4,
|
||||
|
|
@ -96,7 +94,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i64x2: i64,
|
||||
i64x2_from_bits | u64x2,
|
||||
i64x2_from_bits,
|
||||
test_v128 | u64x2,
|
||||
f64x2,
|
||||
u32x4,
|
||||
i32x4,
|
||||
|
|
@ -109,7 +108,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
f64x2: f64,
|
||||
f64x2_from_bits | i64x2,
|
||||
f64x2_from_bits,
|
||||
test_v128 | i64x2,
|
||||
u64x2,
|
||||
u32x4,
|
||||
i32x4,
|
||||
|
|
@ -122,7 +122,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u32x4: u32,
|
||||
u32x4_from_bits | u64x2,
|
||||
u32x4_from_bits,
|
||||
test_v128 | u64x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
i32x4,
|
||||
|
|
@ -135,7 +136,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i32x4: i32,
|
||||
i32x4_from_bits | u64x2,
|
||||
i32x4_from_bits,
|
||||
test_v128 | u64x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
u32x4,
|
||||
|
|
@ -148,7 +150,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
f32x4: f32,
|
||||
f32x4_from_bits | u64x2,
|
||||
f32x4_from_bits,
|
||||
test_v128 | u64x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
i32x4,
|
||||
|
|
@ -161,7 +164,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u16x8: u16,
|
||||
u16x8_from_bits | u64x2,
|
||||
u16x8_from_bits,
|
||||
test_v128 | u64x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
u32x4,
|
||||
|
|
@ -174,7 +178,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i16x8: i16,
|
||||
i16x8_from_bits | u64x2,
|
||||
i16x8_from_bits,
|
||||
test_v128 | u64x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
u32x4,
|
||||
|
|
@ -187,7 +192,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u8x16: u8,
|
||||
u8x16_from_bits | u64x2,
|
||||
u8x16_from_bits,
|
||||
test_v128 | u64x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
u32x4,
|
||||
|
|
@ -200,7 +206,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i8x16: i8,
|
||||
i8x16_from_bits | u64x2,
|
||||
i8x16_from_bits,
|
||||
test_v128 | u64x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
u32x4,
|
||||
|
|
@ -212,12 +219,10 @@ impl_from_bits!(
|
|||
b8x16
|
||||
);
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use coresimd::x86::__m128;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use coresimd::x86::__m128i;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use coresimd::x86::__m128d;
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::{__m128, __m128d, __m128i};
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::{__m128, __m128d, __m128i};
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(f64x2: __m128, __m128i, __m128d);
|
||||
|
|
@ -242,7 +247,8 @@ impl_from_bits_!(i8x16: __m128, __m128i, __m128d);
|
|||
|
||||
impl_from!(
|
||||
f64x2: f64,
|
||||
f64x2_from | f32x2,
|
||||
f64x2_from,
|
||||
test_v128 | f32x2,
|
||||
u64x2,
|
||||
i64x2,
|
||||
u32x2,
|
||||
|
|
@ -254,7 +260,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
f32x4: f32,
|
||||
f32x4_from | f64x4,
|
||||
f32x4_from,
|
||||
test_v128 | f64x4,
|
||||
u64x4,
|
||||
i64x4,
|
||||
u32x4,
|
||||
|
|
@ -266,7 +273,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u64x2: u64,
|
||||
u64x2_from | f32x2,
|
||||
u64x2_from,
|
||||
test_v128 | f32x2,
|
||||
f64x2,
|
||||
i64x2,
|
||||
i32x2,
|
||||
|
|
@ -278,7 +286,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i64x2: i64,
|
||||
i64x2_from | f32x2,
|
||||
i64x2_from,
|
||||
test_v128 | f32x2,
|
||||
f64x2,
|
||||
u64x2,
|
||||
i32x2,
|
||||
|
|
@ -290,7 +299,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u32x4: u32,
|
||||
u32x4_from | f64x4,
|
||||
u32x4_from,
|
||||
test_v128 | f64x4,
|
||||
u64x4,
|
||||
i64x4,
|
||||
f32x4,
|
||||
|
|
@ -302,7 +312,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i32x4: i32,
|
||||
i32x4_from | f64x4,
|
||||
i32x4_from,
|
||||
test_v128 | f64x4,
|
||||
u64x4,
|
||||
i64x4,
|
||||
f32x4,
|
||||
|
|
@ -314,7 +325,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i16x8: i16,
|
||||
i16x8_from | f64x8,
|
||||
i16x8_from,
|
||||
test_v128 | f64x8,
|
||||
u64x8,
|
||||
i64x8,
|
||||
f32x8,
|
||||
|
|
@ -326,7 +338,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u16x8: u16,
|
||||
u16x8_from | f64x8,
|
||||
u16x8_from,
|
||||
test_v128 | f64x8,
|
||||
u64x8,
|
||||
i64x8,
|
||||
f32x8,
|
||||
|
|
|
|||
|
|
@ -1,34 +1,33 @@
|
|||
//! 16-bit wide portable packed vector types.
|
||||
|
||||
simd_api_imports!();
|
||||
|
||||
simd_i_ty! {
|
||||
i8x2: 2, i8, b8x2, i8x2_tests |
|
||||
i8x2: 2, i8, b8x2, i8x2_tests, test_v16 |
|
||||
i8, i8 |
|
||||
x0, x1 |
|
||||
/// A 16-bit wide vector with 2 `i8` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u8x2: 2, u8, b8x2, u8x2_tests |
|
||||
u8x2: 2, u8, b8x2, u8x2_tests, test_v16 |
|
||||
u8, u8 |
|
||||
x0, x1 |
|
||||
/// A 16-bit wide vector with 2 `u8` lanes.
|
||||
}
|
||||
|
||||
simd_b_ty! {
|
||||
b8x2: 2, i8, b8x2_tests |
|
||||
b8x2: 2, i8, b8x2_tests, test_v16 |
|
||||
i8, i8 |
|
||||
x0, x1 |
|
||||
/// A 16-bit wide vector with 2 `bool` lanes.
|
||||
}
|
||||
|
||||
impl_from_bits!(i8x2: i8, i8x2_from_bits | u8x2, b8x2);
|
||||
impl_from_bits!(u8x2: u8, u8x2_from_bits | i8x2, b8x2);
|
||||
impl_from_bits!(i8x2: i8, i8x2_from_bits, test_v16 | u8x2, b8x2);
|
||||
impl_from_bits!(u8x2: u8, u8x2_from_bits, test_v16 | i8x2, b8x2);
|
||||
|
||||
impl_from!(
|
||||
i8x2: i8,
|
||||
i8x2_from | f64x2,
|
||||
i8x2_from,
|
||||
test_v16 | f64x2,
|
||||
u64x2,
|
||||
i64x2,
|
||||
f32x2,
|
||||
|
|
@ -39,7 +38,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u8x2: u8,
|
||||
u8x2_from | f64x2,
|
||||
u8x2_from,
|
||||
test_v16 | f64x2,
|
||||
u64x2,
|
||||
i64x2,
|
||||
f32x2,
|
||||
|
|
|
|||
|
|
@ -1,11 +1,8 @@
|
|||
//! 256-bit wide portable packed vector types.
|
||||
|
||||
simd_api_imports!();
|
||||
|
||||
use coresimd::simd::{b8x16, b8x4, b8x8};
|
||||
|
||||
simd_i_ty! {
|
||||
i8x32: 32, i8, b8x32, i8x32_tests |
|
||||
i8x32: 32, i8, b8x32, i8x32_tests, test_v256 |
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
|
|
@ -16,7 +13,7 @@ simd_i_ty! {
|
|||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u8x32: 32, u8, b8x32, u8x32_tests |
|
||||
u8x32: 32, u8, b8x32, u8x32_tests, test_v256 |
|
||||
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
|
||||
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
|
|
@ -27,7 +24,7 @@ simd_u_ty! {
|
|||
}
|
||||
|
||||
simd_b_ty! {
|
||||
b8x32: 32, i8, b8x32_tests |
|
||||
b8x32: 32, i8, b8x32_tests, test_v256 |
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
|
|
@ -38,7 +35,7 @@ simd_b_ty! {
|
|||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i16x16: 16, i16, b8x16, i16x16_tests |
|
||||
i16x16: 16, i16, b8x16, i16x16_tests, test_v256 |
|
||||
i16, i16, i16, i16, i16, i16, i16, i16,
|
||||
i16, i16, i16, i16, i16, i16, i16, i16 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
|
|
@ -47,7 +44,7 @@ simd_i_ty! {
|
|||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u16x16: 16, u16, b8x16, u16x16_tests |
|
||||
u16x16: 16, u16, b8x16, u16x16_tests, test_v256 |
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
u16, u16, u16, u16, u16, u16, u16, u16 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
|
|
@ -56,42 +53,42 @@ simd_u_ty! {
|
|||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i32x8: 8, i32, b8x8, i32x8_tests |
|
||||
i32x8: 8, i32, b8x8, i32x8_tests, test_v256 |
|
||||
i32, i32, i32, i32, i32, i32, i32, i32 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 256-bit vector with 8 `i32` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u32x8: 8, u32, b8x8, u32x8_tests |
|
||||
u32x8: 8, u32, b8x8, u32x8_tests, test_v256 |
|
||||
u32, u32, u32, u32, u32, u32, u32, u32 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 256-bit vector with 8 `u32` lanes.
|
||||
}
|
||||
|
||||
simd_f_ty! {
|
||||
f32x8: 8, f32, b8x8, f32x8_tests |
|
||||
f32x8: 8, f32, b8x8, f32x8_tests, test_v256 |
|
||||
f32, f32, f32, f32, f32, f32, f32, f32 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 256-bit vector with 8 `f32` lanes.
|
||||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i64x4: 4, i64, b8x4, i64x4_tests |
|
||||
i64x4: 4, i64, b8x4, i64x4_tests, test_v256 |
|
||||
i64, i64, i64, i64 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 256-bit vector with 4 `i64` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u64x4: 4, u64, b8x4, u64x4_tests |
|
||||
u64x4: 4, u64, b8x4, u64x4_tests, test_v256 |
|
||||
u64, u64, u64, u64 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 256-bit vector with 4 `u64` lanes.
|
||||
}
|
||||
|
||||
simd_f_ty! {
|
||||
f64x4: 4, f64, b8x4, f64x4_tests |
|
||||
f64x4: 4, f64, b8x4, f64x4_tests, test_v256 |
|
||||
f64, f64, f64, f64 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 256-bit vector with 4 `f64` lanes.
|
||||
|
|
@ -99,7 +96,8 @@ simd_f_ty! {
|
|||
|
||||
impl_from_bits!(
|
||||
i8x32: i8,
|
||||
i8x32_from_bits | u64x4,
|
||||
i8x32_from_bits,
|
||||
test_v256 | u64x4,
|
||||
i64x4,
|
||||
f64x4,
|
||||
u32x8,
|
||||
|
|
@ -112,7 +110,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u8x32: u8,
|
||||
u8x32_from_bits | u64x4,
|
||||
u8x32_from_bits,
|
||||
test_v256 | u64x4,
|
||||
i64x4,
|
||||
f64x4,
|
||||
u32x8,
|
||||
|
|
@ -125,7 +124,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i16x16: i16,
|
||||
i16x16_from_bits | u64x4,
|
||||
i16x16_from_bits,
|
||||
test_v256 | u64x4,
|
||||
i64x4,
|
||||
f64x4,
|
||||
u32x8,
|
||||
|
|
@ -138,7 +138,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u16x16: u16,
|
||||
u16x16_from_bits | u64x4,
|
||||
u16x16_from_bits,
|
||||
test_v256 | u64x4,
|
||||
i64x4,
|
||||
f64x4,
|
||||
u32x8,
|
||||
|
|
@ -151,7 +152,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i32x8: i32,
|
||||
i32x8_from_bits | u64x4,
|
||||
i32x8_from_bits,
|
||||
test_v256 | u64x4,
|
||||
i64x4,
|
||||
f64x4,
|
||||
u32x8,
|
||||
|
|
@ -164,7 +166,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u32x8: u32,
|
||||
u32x8_from_bits | u64x4,
|
||||
u32x8_from_bits,
|
||||
test_v256 | u64x4,
|
||||
i64x4,
|
||||
f64x4,
|
||||
i32x8,
|
||||
|
|
@ -177,7 +180,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
f32x8: f32,
|
||||
f32x8_from_bits | u64x4,
|
||||
f32x8_from_bits,
|
||||
test_v256 | u64x4,
|
||||
i64x4,
|
||||
f64x4,
|
||||
i32x8,
|
||||
|
|
@ -190,7 +194,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i64x4: i64,
|
||||
i64x4_from_bits | u64x4,
|
||||
i64x4_from_bits,
|
||||
test_v256 | u64x4,
|
||||
f64x4,
|
||||
i32x8,
|
||||
u32x8,
|
||||
|
|
@ -203,7 +208,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u64x4: u64,
|
||||
u64x4_from_bits | i64x4,
|
||||
u64x4_from_bits,
|
||||
test_v256 | i64x4,
|
||||
f64x4,
|
||||
i32x8,
|
||||
u32x8,
|
||||
|
|
@ -216,7 +222,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
f64x4: f64,
|
||||
f64x4_from_bits | i64x4,
|
||||
f64x4_from_bits,
|
||||
test_v256 | i64x4,
|
||||
u64x4,
|
||||
i32x8,
|
||||
u32x8,
|
||||
|
|
@ -228,12 +235,10 @@ impl_from_bits!(
|
|||
b8x32
|
||||
);
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use coresimd::x86::__m256;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use coresimd::x86::__m256i;
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use coresimd::x86::__m256d;
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::{__m256, __m256d, __m256i};
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::{__m256, __m256d, __m256i};
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(f64x4: __m256, __m256i, __m256d);
|
||||
|
|
@ -258,7 +263,8 @@ impl_from_bits_!(i8x32: __m256, __m256i, __m256d);
|
|||
|
||||
impl_from!(
|
||||
f64x4: f64,
|
||||
f64x4_from | u64x4,
|
||||
f64x4_from,
|
||||
test_v256 | u64x4,
|
||||
i64x4,
|
||||
u32x4,
|
||||
i32x4,
|
||||
|
|
@ -270,7 +276,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i64x4: i64,
|
||||
i64x4_from | u64x4,
|
||||
i64x4_from,
|
||||
test_v256 | u64x4,
|
||||
f64x4,
|
||||
u32x4,
|
||||
i32x4,
|
||||
|
|
@ -282,7 +289,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u64x4: u64,
|
||||
u64x4_from | i64x4,
|
||||
u64x4_from,
|
||||
test_v256 | i64x4,
|
||||
f64x4,
|
||||
u32x4,
|
||||
i32x4,
|
||||
|
|
@ -294,7 +302,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
f32x8: f32,
|
||||
f32x8_from | u64x8,
|
||||
f32x8_from,
|
||||
test_v256 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
u32x8,
|
||||
|
|
@ -306,7 +315,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i32x8: i32,
|
||||
i32x8_from | u64x8,
|
||||
i32x8_from,
|
||||
test_v256 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
u32x8,
|
||||
|
|
@ -318,7 +328,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u32x8: u32,
|
||||
u32x8_from | u64x8,
|
||||
u32x8_from,
|
||||
test_v256 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
i32x8,
|
||||
|
|
@ -330,7 +341,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i16x16: i16,
|
||||
i16x16_from | u32x16,
|
||||
i16x16_from,
|
||||
test_v256 | u32x16,
|
||||
i32x16,
|
||||
f32x16,
|
||||
u16x16,
|
||||
|
|
@ -339,12 +351,13 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u16x16: u16,
|
||||
u16x16_from | u32x16,
|
||||
u16x16_from,
|
||||
test_v256 | u32x16,
|
||||
i32x16,
|
||||
f32x16,
|
||||
i16x16,
|
||||
u8x16,
|
||||
i8x16
|
||||
);
|
||||
impl_from!(i8x32: i8, i8x32_from | u16x32, i16x32, u8x32);
|
||||
impl_from!(u8x32: u8, u8x32_from | u16x32, i16x32, i8x32);
|
||||
impl_from!(i8x32: i8, i8x32_from, test_v256 | u16x32, i16x32, u8x32);
|
||||
impl_from!(u8x32: u8, u8x32_from, test_v256 | u16x32, i16x32, i8x32);
|
||||
|
|
|
|||
|
|
@ -1,51 +1,78 @@
|
|||
//! 32-bit wide portable packed vector types.
|
||||
|
||||
simd_api_imports!();
|
||||
use coresimd::simd::b8x2;
|
||||
|
||||
simd_i_ty! {
|
||||
i16x2: 2, i16, b8x2, i16x2_tests |
|
||||
i16x2: 2, i16, b8x2, i16x2_tests, test_v32 |
|
||||
i16, i16 |
|
||||
x0, x1 |
|
||||
/// A 32-bit wide vector with 2 `i16` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u16x2: 2, u16, b8x2, u16x2_tests |
|
||||
u16x2: 2, u16, b8x2, u16x2_tests, test_v32 |
|
||||
u16, u16 |
|
||||
x0, x1 |
|
||||
/// A 32-bit wide vector with 2 `u16` lanes.
|
||||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i8x4: 4, i8, b8x4, i8x4_tests |
|
||||
i8x4: 4, i8, b8x4, i8x4_tests, test_v32 |
|
||||
i8, i8, i8, i8 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 32-bit wide vector with 4 `i8` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u8x4: 4, u8, b8x4, u8x4_tests |
|
||||
u8x4: 4, u8, b8x4, u8x4_tests, test_v32 |
|
||||
u8, u8, u8, u8 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 32-bit wide vector with 4 `u8` lanes.
|
||||
}
|
||||
|
||||
simd_b_ty! {
|
||||
b8x4: 4, i8, b8x4_tests |
|
||||
b8x4: 4, i8, b8x4_tests, test_v32 |
|
||||
i8, i8, i8, i8 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 32-bit wide vector with 4 `bool` lanes.
|
||||
}
|
||||
|
||||
impl_from_bits!(i16x2: i16, i16x2_from_bits | u16x2, i8x4, u8x4, b8x4);
|
||||
impl_from_bits!(u16x2: u16, u16x2_from_bits | i16x2, i8x4, u8x4, b8x4);
|
||||
impl_from_bits!(i8x4: i8, i8x2_from_bits | i16x2, u16x2, u8x4, b8x4);
|
||||
impl_from_bits!(u8x4: u8, u8x2_from_bits | i16x2, u16x2, i8x4, b8x4);
|
||||
impl_from_bits!(
|
||||
i16x2: i16,
|
||||
i16x2_from_bits,
|
||||
test_v32 | u16x2,
|
||||
i8x4,
|
||||
u8x4,
|
||||
b8x4
|
||||
);
|
||||
impl_from_bits!(
|
||||
u16x2: u16,
|
||||
u16x2_from_bits,
|
||||
test_v32 | i16x2,
|
||||
i8x4,
|
||||
u8x4,
|
||||
b8x4
|
||||
);
|
||||
impl_from_bits!(
|
||||
i8x4: i8,
|
||||
i8x2_from_bits,
|
||||
test_v32 | i16x2,
|
||||
u16x2,
|
||||
u8x4,
|
||||
b8x4
|
||||
);
|
||||
impl_from_bits!(
|
||||
u8x4: u8,
|
||||
u8x2_from_bits,
|
||||
test_v32 | i16x2,
|
||||
u16x2,
|
||||
i8x4,
|
||||
b8x4
|
||||
);
|
||||
|
||||
impl_from!(
|
||||
i16x2: i16,
|
||||
i16x2_from | f64x2,
|
||||
i16x2_from,
|
||||
test_v32 | f64x2,
|
||||
u64x2,
|
||||
i64x2,
|
||||
f32x2,
|
||||
|
|
@ -58,7 +85,8 @@ impl_from!(
|
|||
|
||||
impl_from!(
|
||||
u16x2: u16,
|
||||
u16x2_from | f64x2,
|
||||
u16x2_from,
|
||||
test_v32 | f64x2,
|
||||
u64x2,
|
||||
i64x2,
|
||||
f32x2,
|
||||
|
|
@ -71,7 +99,8 @@ impl_from!(
|
|||
|
||||
impl_from!(
|
||||
i8x4: i8,
|
||||
i8x4_from | f64x4,
|
||||
i8x4_from,
|
||||
test_v32 | f64x4,
|
||||
u64x4,
|
||||
i64x4,
|
||||
u32x4,
|
||||
|
|
@ -84,7 +113,8 @@ impl_from!(
|
|||
|
||||
impl_from!(
|
||||
u8x4: u8,
|
||||
u8x4_from | f64x4,
|
||||
u8x4_from,
|
||||
test_v32 | f64x4,
|
||||
u64x4,
|
||||
i64x4,
|
||||
u32x4,
|
||||
|
|
|
|||
|
|
@ -1,11 +1,8 @@
|
|||
//! 512-bit wide portable packed vector types.
|
||||
|
||||
simd_api_imports!();
|
||||
|
||||
use coresimd::simd::{b8x16, b8x32, b8x8};
|
||||
|
||||
simd_i_ty! {
|
||||
i8x64: 64, i8, b8x64, i8x64_tests |
|
||||
i8x64: 64, i8, b8x64, i8x64_tests, test_v512 |
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
|
|
@ -22,7 +19,7 @@ simd_i_ty! {
|
|||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u8x64: 64, u8, b8x64, u8x64_tests |
|
||||
u8x64: 64, u8, b8x64, u8x64_tests, test_v512 |
|
||||
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
|
||||
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
|
||||
u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8,
|
||||
|
|
@ -39,7 +36,7 @@ simd_u_ty! {
|
|||
}
|
||||
|
||||
simd_b_ty! {
|
||||
b8x64: 64, i8, b8x64_tests |
|
||||
b8x64: 64, i8, b8x64_tests, test_v512 |
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
|
|
@ -56,7 +53,7 @@ simd_b_ty! {
|
|||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i16x32: 32, i16, b8x32, i16x32_tests |
|
||||
i16x32: 32, i16, b8x32, i16x32_tests, test_v512 |
|
||||
i16, i16, i16, i16, i16, i16, i16, i16,
|
||||
i16, i16, i16, i16, i16, i16, i16, i16,
|
||||
i16, i16, i16, i16, i16, i16, i16, i16,
|
||||
|
|
@ -69,7 +66,7 @@ simd_i_ty! {
|
|||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u16x32: 32, u16, b8x32, u16x32_tests |
|
||||
u16x32: 32, u16, b8x32, u16x32_tests, test_v512 |
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
|
|
@ -81,7 +78,7 @@ simd_u_ty! {
|
|||
/// A 512-bit vector with 32 `u16` lanes.
|
||||
}
|
||||
simd_i_ty! {
|
||||
i32x16: 16, i32, b8x16, i32x16_tests |
|
||||
i32x16: 16, i32, b8x16, i32x16_tests, test_v512 |
|
||||
i32, i32, i32, i32, i32, i32, i32, i32,
|
||||
i32, i32, i32, i32, i32, i32, i32, i32 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
|
|
@ -90,7 +87,7 @@ simd_i_ty! {
|
|||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u32x16: 16, u32, b8x16, u32x16_tests |
|
||||
u32x16: 16, u32, b8x16, u32x16_tests, test_v512 |
|
||||
u32, u32, u32, u32, u32, u32, u32, u32,
|
||||
u32, u32, u32, u32, u32, u32, u32, u32 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
|
|
@ -99,7 +96,7 @@ simd_u_ty! {
|
|||
}
|
||||
|
||||
simd_f_ty! {
|
||||
f32x16: 16, f32, b8x16, f32x16_tests |
|
||||
f32x16: 16, f32, b8x16, f32x16_tests, test_v512 |
|
||||
f32, f32, f32, f32, f32, f32, f32, f32,
|
||||
f32, f32, f32, f32, f32, f32, f32, f32 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
|
|
@ -108,21 +105,21 @@ simd_f_ty! {
|
|||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i64x8: 8, i64, b8x8, i64x8_tests |
|
||||
i64x8: 8, i64, b8x8, i64x8_tests, test_v512 |
|
||||
i64, i64, i64, i64, i64, i64, i64, i64 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 512-bit vector with 8 `i64` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u64x8: 8, u64, b8x8, u64x8_tests |
|
||||
u64x8: 8, u64, b8x8, u64x8_tests, test_v512 |
|
||||
u64, u64, u64, u64, u64, u64, u64, u64 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 512-bit vector with 8 `u64` lanes.
|
||||
}
|
||||
|
||||
simd_f_ty! {
|
||||
f64x8: 8, f64, b8x8, f64x8_tests |
|
||||
f64x8: 8, f64, b8x8, f64x8_tests, test_v512 |
|
||||
f64, f64, f64, f64, f64, f64, f64, f64 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 512-bit vector with 8 `f64` lanes.
|
||||
|
|
@ -130,7 +127,8 @@ simd_f_ty! {
|
|||
|
||||
impl_from_bits!(
|
||||
i8x64: i8,
|
||||
i8x64_from_bits | u64x8,
|
||||
i8x64_from_bits,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
u32x16,
|
||||
|
|
@ -143,7 +141,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u8x64: u8,
|
||||
u8x64_from_bits | u64x8,
|
||||
u8x64_from_bits,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
u32x16,
|
||||
|
|
@ -156,7 +155,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i16x32: i16,
|
||||
i16x32_from_bits | u64x8,
|
||||
i16x32_from_bits,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
u32x16,
|
||||
|
|
@ -169,7 +169,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u16x32: u16,
|
||||
u16x32_from_bits | u64x8,
|
||||
u16x32_from_bits,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
u32x16,
|
||||
|
|
@ -182,7 +183,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i32x16: i32,
|
||||
i32x16_from_bits | u64x8,
|
||||
i32x16_from_bits,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
u32x16,
|
||||
|
|
@ -195,7 +197,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u32x16: u32,
|
||||
u32x16_from_bits | u64x8,
|
||||
u32x16_from_bits,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
i32x16,
|
||||
|
|
@ -208,7 +211,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
f32x16: f32,
|
||||
f32x16_from_bits | u64x8,
|
||||
f32x16_from_bits,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
f64x8,
|
||||
u32x16,
|
||||
|
|
@ -221,7 +225,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i64x8: i64,
|
||||
i64x8_from_bits | u64x8,
|
||||
i64x8_from_bits,
|
||||
test_v512 | u64x8,
|
||||
f64x8,
|
||||
u32x16,
|
||||
i32x16,
|
||||
|
|
@ -234,7 +239,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u64x8: u64,
|
||||
u64x8_from_bits | i64x8,
|
||||
u64x8_from_bits,
|
||||
test_v512 | i64x8,
|
||||
f64x8,
|
||||
u32x16,
|
||||
i32x16,
|
||||
|
|
@ -247,7 +253,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
f64x8: f64,
|
||||
f64x8_from_bits | u64x8,
|
||||
f64x8_from_bits,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
u32x16,
|
||||
i32x16,
|
||||
|
|
@ -261,7 +268,8 @@ impl_from_bits!(
|
|||
|
||||
impl_from!(
|
||||
f64x8: f64,
|
||||
f64x8_from | u64x8,
|
||||
f64x8_from,
|
||||
test_v512 | u64x8,
|
||||
i64x8,
|
||||
u32x8,
|
||||
i32x8,
|
||||
|
|
@ -273,7 +281,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i64x8: i64,
|
||||
i64x8_from | u64x8,
|
||||
i64x8_from,
|
||||
test_v512 | u64x8,
|
||||
f64x8,
|
||||
u32x8,
|
||||
i32x8,
|
||||
|
|
@ -285,7 +294,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u64x8: u64,
|
||||
u64x8_from | i64x8,
|
||||
u64x8_from,
|
||||
test_v512 | i64x8,
|
||||
f64x8,
|
||||
u32x8,
|
||||
i32x8,
|
||||
|
|
@ -298,7 +308,8 @@ impl_from!(
|
|||
|
||||
impl_from!(
|
||||
f32x16: f32,
|
||||
f32x16_from | u32x16,
|
||||
f32x16_from,
|
||||
test_v512 | u32x16,
|
||||
i32x16,
|
||||
u16x16,
|
||||
i16x16,
|
||||
|
|
@ -307,7 +318,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i32x16: i32,
|
||||
i32x16_from | u32x16,
|
||||
i32x16_from,
|
||||
test_v512 | u32x16,
|
||||
f32x16,
|
||||
u16x16,
|
||||
i16x16,
|
||||
|
|
@ -316,7 +328,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u32x16: u32,
|
||||
u32x16_from | i32x16,
|
||||
u32x16_from,
|
||||
test_v512 | i32x16,
|
||||
f32x16,
|
||||
u16x16,
|
||||
i16x16,
|
||||
|
|
@ -324,8 +337,8 @@ impl_from!(
|
|||
i8x16
|
||||
);
|
||||
|
||||
impl_from!(i16x32: i16, i16x32_from | u16x32, u8x32, i8x32);
|
||||
impl_from!(u16x32: u16, u16x32_from | i16x32, u8x32, i8x32);
|
||||
impl_from!(i16x32: i16, i16x32_from, test_v512 | u16x32, u8x32, i8x32);
|
||||
impl_from!(u16x32: u16, u16x32_from, test_v512 | i16x32, u8x32, i8x32);
|
||||
|
||||
impl_from!(i8x64: i8, i8x64_from | u8x64);
|
||||
impl_from!(u8x64: u8, u8x64_from | i8x64);
|
||||
impl_from!(i8x64: i8, i8x64_from, test_v512 | u8x64);
|
||||
impl_from!(u8x64: u8, u8x64_from, test_v512 | i8x64);
|
||||
|
|
|
|||
|
|
@ -1,60 +1,57 @@
|
|||
//! 64-bit wide portable packed vector types.
|
||||
|
||||
simd_api_imports!();
|
||||
|
||||
use coresimd::simd::{b8x2, b8x4};
|
||||
|
||||
simd_i_ty! {
|
||||
i8x8: 8, i8, b8x8, i8x8_tests |
|
||||
i8x8: 8, i8, b8x8, i8x8_tests, test_v64 |
|
||||
i8, i8, i8, i8, i8, i8, i8, i8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 64-bit vector with 8 `i8` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u8x8: 8, u8, b8x8, u8x8_tests |
|
||||
u8x8: 8, u8, b8x8, u8x8_tests, test_v64 |
|
||||
u8, u8, u8, u8, u8, u8, u8, u8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 64-bit vector with 8 `u8` lanes.
|
||||
}
|
||||
|
||||
simd_b_ty! {
|
||||
b8x8: 8, i8, b8x8_tests |
|
||||
b8x8: 8, i8, b8x8_tests, test_v64 |
|
||||
i8, i8, i8, i8, i8, i8, i8, i8 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7 |
|
||||
/// A 64-bit vector with 8 `bool` lanes.
|
||||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i16x4: 4, i16, b8x4, i16x4_tests |
|
||||
i16x4: 4, i16, b8x4, i16x4_tests, test_v64 |
|
||||
i16, i16, i16, i16 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 64-bit vector with 4 `i16` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u16x4: 4, u16, b8x4, u16x4_tests |
|
||||
u16x4: 4, u16, b8x4, u16x4_tests, test_v64 |
|
||||
u16, u16, u16, u16 |
|
||||
x0, x1, x2, x3 |
|
||||
/// A 64-bit vector with 4 `u16` lanes.
|
||||
}
|
||||
|
||||
simd_i_ty! {
|
||||
i32x2: 2, i32, b8x2, i32x2_tests |
|
||||
i32x2: 2, i32, b8x2, i32x2_tests, test_v64 |
|
||||
i32, i32 |
|
||||
x0, x1 |
|
||||
/// A 64-bit vector with 2 `i32` lanes.
|
||||
}
|
||||
|
||||
simd_u_ty! {
|
||||
u32x2: 2, u32, b8x2, u32x2_tests |
|
||||
u32x2: 2, u32, b8x2, u32x2_tests, test_v64 |
|
||||
u32, u32 |
|
||||
x0, x1 |
|
||||
/// A 64-bit vector with 2 `u32` lanes.
|
||||
}
|
||||
|
||||
simd_f_ty! {
|
||||
f32x2: 2, f32, b8x2, f32x2_tests |
|
||||
f32x2: 2, f32, b8x2, f32x2_tests, test_v64 |
|
||||
f32, f32 |
|
||||
x0, x1 |
|
||||
/// A 64-bit vector with 2 `f32` lanes.
|
||||
|
|
@ -62,7 +59,8 @@ simd_f_ty! {
|
|||
|
||||
impl_from_bits!(
|
||||
u32x2: u32,
|
||||
u32x2_from_bits | i32x2,
|
||||
u32x2_from_bits,
|
||||
test_v64 | i32x2,
|
||||
f32x2,
|
||||
u16x4,
|
||||
i16x4,
|
||||
|
|
@ -72,7 +70,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i32x2: i32,
|
||||
i32x2_from_bits | u32x2,
|
||||
i32x2_from_bits,
|
||||
test_v64 | u32x2,
|
||||
f32x2,
|
||||
u16x4,
|
||||
i16x4,
|
||||
|
|
@ -82,7 +81,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
f32x2: f32,
|
||||
f32x2_from_bits | i32x2,
|
||||
f32x2_from_bits,
|
||||
test_v64 | i32x2,
|
||||
u32x2,
|
||||
u16x4,
|
||||
i16x4,
|
||||
|
|
@ -92,7 +92,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u16x4: u16,
|
||||
u16x4_from_bits | u32x2,
|
||||
u16x4_from_bits,
|
||||
test_v64 | u32x2,
|
||||
i32x2,
|
||||
i16x4,
|
||||
u8x8,
|
||||
|
|
@ -101,7 +102,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i16x4: i16,
|
||||
i16x4_from_bits | u32x2,
|
||||
i16x4_from_bits,
|
||||
test_v64 | u32x2,
|
||||
i32x2,
|
||||
u16x4,
|
||||
u8x8,
|
||||
|
|
@ -110,7 +112,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
u8x8: u8,
|
||||
u8x8_from_bits | u32x2,
|
||||
u8x8_from_bits,
|
||||
test_v64 | u32x2,
|
||||
i32x2,
|
||||
u16x4,
|
||||
i16x4,
|
||||
|
|
@ -119,7 +122,8 @@ impl_from_bits!(
|
|||
);
|
||||
impl_from_bits!(
|
||||
i8x8: i8,
|
||||
i8x8_from_bits | u32x2,
|
||||
i8x8_from_bits,
|
||||
test_v64 | u32x2,
|
||||
i32x2,
|
||||
u16x4,
|
||||
i16x4,
|
||||
|
|
@ -127,8 +131,11 @@ impl_from_bits!(
|
|||
b8x8
|
||||
);
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
use coresimd::x86::__m64;
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::__m64;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::__m64;
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(f32x2: __m64);
|
||||
|
|
@ -147,7 +154,8 @@ impl_from_bits_!(i8x8: __m64);
|
|||
|
||||
impl_from!(
|
||||
f32x2: f32,
|
||||
f32x2_from | f64x2,
|
||||
f32x2_from,
|
||||
test_v64 | f64x2,
|
||||
u64x2,
|
||||
i64x2,
|
||||
u32x2,
|
||||
|
|
@ -160,7 +168,8 @@ impl_from!(
|
|||
|
||||
impl_from!(
|
||||
u32x2: u32,
|
||||
u32x2_from | f64x2,
|
||||
u32x2_from,
|
||||
test_v64 | f64x2,
|
||||
u64x2,
|
||||
i64x2,
|
||||
f32x2,
|
||||
|
|
@ -173,7 +182,8 @@ impl_from!(
|
|||
|
||||
impl_from!(
|
||||
i32x2: i32,
|
||||
i32x2_from | f64x2,
|
||||
i32x2_from,
|
||||
test_v64 | f64x2,
|
||||
u64x2,
|
||||
i64x2,
|
||||
f32x2,
|
||||
|
|
@ -186,7 +196,8 @@ impl_from!(
|
|||
|
||||
impl_from!(
|
||||
u16x4: u16,
|
||||
u16x4_from | f64x4,
|
||||
u16x4_from,
|
||||
test_v64 | f64x4,
|
||||
u64x4,
|
||||
i64x4,
|
||||
f32x4,
|
||||
|
|
@ -199,7 +210,8 @@ impl_from!(
|
|||
|
||||
impl_from!(
|
||||
i16x4: i16,
|
||||
i16x4_from | f64x4,
|
||||
i16x4_from,
|
||||
test_v64 | f64x4,
|
||||
u64x4,
|
||||
i64x4,
|
||||
f32x4,
|
||||
|
|
@ -211,7 +223,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
i8x8: i8,
|
||||
i8x8_from | f64x8,
|
||||
i8x8_from,
|
||||
test_v64 | f64x8,
|
||||
u64x8,
|
||||
i64x8,
|
||||
f32x8,
|
||||
|
|
@ -223,7 +236,8 @@ impl_from!(
|
|||
);
|
||||
impl_from!(
|
||||
u8x8: u8,
|
||||
u8x8_from | f64x8,
|
||||
u8x8_from,
|
||||
test_v64 | f64x8,
|
||||
u64x8,
|
||||
i64x8,
|
||||
f32x8,
|
||||
|
|
|
|||
|
|
@ -31,4 +31,18 @@ extern "platform-intrinsic" {
|
|||
pub fn simd_and<T>(x: T, y: T) -> T;
|
||||
pub fn simd_or<T>(x: T, y: T) -> T;
|
||||
pub fn simd_xor<T>(x: T, y: T) -> T;
|
||||
|
||||
pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_add_ordered<T, U>(x: T, acc: U) -> U;
|
||||
pub fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
|
||||
pub fn simd_reduce_min<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_max<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_min_nanless<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_max_nanless<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_and<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_or<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_xor<T, U>(x: T) -> U;
|
||||
pub fn simd_reduce_all<T>(x: T) -> bool;
|
||||
pub fn simd_reduce_any<T>(x: T) -> bool;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,16 @@ extern crate stdsimd_test;
|
|||
#[cfg(test)]
|
||||
extern crate test;
|
||||
|
||||
macro_rules! test_v16 { ($item:item) => {} }
|
||||
macro_rules! test_v32 { ($item:item) => {} }
|
||||
macro_rules! test_v64 { ($item:item) => {} }
|
||||
macro_rules! test_v128 { ($item:item) => {} }
|
||||
macro_rules! test_v256 { ($item:item) => {} }
|
||||
macro_rules! test_v512 { ($item:item) => {} }
|
||||
macro_rules! vector_impl {
|
||||
($([$f:ident, $($args:tt)*]),*) => { $($f!($($args)*);)* }
|
||||
}
|
||||
|
||||
#[path = "../../../coresimd/mod.rs"]
|
||||
mod coresimd;
|
||||
|
||||
|
|
|
|||
42
library/stdarch/crates/coresimd/tests/v128.rs
Normal file
42
library/stdarch/crates/coresimd/tests/v128.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! coresimd 128-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![allow(unused_imports, dead_code)]
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate coresimd;
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v16 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v32 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v64 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v128 { ($item:item) => { $item } }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v256 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v512 { ($item:item) => {} }
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! vector_impl {
|
||||
($([$f:ident, $($args:tt)*]),*) => { }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "../../../coresimd/ppsv/mod.rs"]
|
||||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
extern crate core as _core;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use _core::num;
|
||||
42
library/stdarch/crates/coresimd/tests/v16.rs
Normal file
42
library/stdarch/crates/coresimd/tests/v16.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! coresimd 16-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![allow(unused_imports, dead_code)]
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate coresimd;
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v16 { ($item:item) => { $item } }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v32 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v64 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v128 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v256 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v512 { ($item:item) => {} }
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! vector_impl {
|
||||
($([$f:ident, $($args:tt)*]),*) => { }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "../../../coresimd/ppsv/mod.rs"]
|
||||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
extern crate core as _core;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use _core::num;
|
||||
42
library/stdarch/crates/coresimd/tests/v256.rs
Normal file
42
library/stdarch/crates/coresimd/tests/v256.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! coresimd 256-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![allow(unused_imports)]
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate coresimd;
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v16 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v32 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v64 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v128 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v256 { ($item:item) => { $item } }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v512 { ($item:item) => {} }
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! vector_impl {
|
||||
($([$f:ident, $($args:tt)*]),*) => { }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "../../../coresimd/ppsv/mod.rs"]
|
||||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
extern crate core as _core;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use _core::num;
|
||||
42
library/stdarch/crates/coresimd/tests/v32.rs
Normal file
42
library/stdarch/crates/coresimd/tests/v32.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! coresimd 32-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![allow(unused_imports, dead_code)]
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate coresimd;
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v16 { ($item:item) => { } }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v32 { ($item:item) => { $item } }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v64 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v128 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v256 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v512 { ($item:item) => {} }
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! vector_impl {
|
||||
($([$f:ident, $($args:tt)*]),*) => { }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "../../../coresimd/ppsv/mod.rs"]
|
||||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
extern crate core as _core;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use _core::num;
|
||||
42
library/stdarch/crates/coresimd/tests/v512.rs
Normal file
42
library/stdarch/crates/coresimd/tests/v512.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! coresimd 512-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![allow(unused_imports)]
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate coresimd;
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v16 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v32 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v64 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v128 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v256 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v512 { ($item:item) => { $item } }
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! vector_impl {
|
||||
($([$f:ident, $($args:tt)*]),*) => { }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "../../../coresimd/ppsv/mod.rs"]
|
||||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
extern crate core as _core;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use _core::num;
|
||||
42
library/stdarch/crates/coresimd/tests/v64.rs
Normal file
42
library/stdarch/crates/coresimd/tests/v64.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
//! coresimd 64-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![allow(unused_imports, dead_code)]
|
||||
|
||||
#[cfg(test)]
|
||||
extern crate coresimd;
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v16 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v32 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v64 { ($item:item) => { $item } }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v128 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v256 { ($item:item) => {} }
|
||||
#[cfg(test)]
|
||||
macro_rules! test_v512 { ($item:item) => {} }
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! vector_impl {
|
||||
($([$f:ident, $($args:tt)*]),*) => { }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "../../../coresimd/ppsv/mod.rs"]
|
||||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
extern crate core as _core;
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use _core::num;
|
||||
Loading…
Add table
Add a link
Reference in a new issue