From bf11a67f0ff7f0e965ea647c24858410074ae19c Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Fri, 6 Apr 2018 16:29:45 +0200 Subject: [PATCH] remaining masks and select (#417) --- library/stdarch/coresimd/aarch64/neon.rs | 9 +- library/stdarch/coresimd/arm/neon.rs | 95 ++++++-- .../stdarch/coresimd/ppsv/api/bitwise_ops.rs | 4 +- .../coresimd/ppsv/api/bitwise_reductions.rs | 4 +- .../coresimd/ppsv/api/bitwise_scalar_ops.rs | 4 +- library/stdarch/coresimd/ppsv/api/cmp.rs | 4 +- .../ppsv/api/{bool_vectors.rs => masks.rs} | 9 +- ...lean_reductions.rs => masks_reductions.rs} | 6 +- .../stdarch/coresimd/ppsv/api/masks_select.rs | 59 +++++ library/stdarch/coresimd/ppsv/api/minimal.rs | 2 + .../coresimd/ppsv/api/minmax_reductions.rs | 2 +- library/stdarch/coresimd/ppsv/api/mod.rs | 58 ++--- library/stdarch/coresimd/ppsv/mod.rs | 5 +- library/stdarch/coresimd/ppsv/v128.rs | 205 ++++++++++++++---- library/stdarch/coresimd/ppsv/v16.rs | 34 ++- library/stdarch/coresimd/ppsv/v256.rs | 177 ++++++++++++--- library/stdarch/coresimd/ppsv/v32.rs | 74 +++++-- library/stdarch/coresimd/ppsv/v512.rs | 174 ++++++++++++--- library/stdarch/coresimd/ppsv/v64.rs | 129 ++++++++--- library/stdarch/coresimd/simd_llvm.rs | 2 + library/stdarch/coresimd/x86/mod.rs | 42 +++- 21 files changed, 857 insertions(+), 241 deletions(-) rename library/stdarch/coresimd/ppsv/api/{bool_vectors.rs => masks.rs} (96%) rename library/stdarch/coresimd/ppsv/api/{boolean_reductions.rs => masks_reductions.rs} (96%) create mode 100644 library/stdarch/coresimd/ppsv/api/masks_select.rs diff --git a/library/stdarch/coresimd/aarch64/neon.rs b/library/stdarch/coresimd/aarch64/neon.rs index 25f03a63d39b..6776098791e9 100644 --- a/library/stdarch/coresimd/aarch64/neon.rs +++ b/library/stdarch/coresimd/aarch64/neon.rs @@ -18,24 +18,29 @@ impl_from_bits_!( float64x1_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( float64x2_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); /// Vector add. diff --git a/library/stdarch/coresimd/arm/neon.rs b/library/stdarch/coresimd/arm/neon.rs index 98dac8db6890..f352b40405a8 100644 --- a/library/stdarch/coresimd/arm/neon.rs +++ b/library/stdarch/coresimd/arm/neon.rs @@ -70,245 +70,298 @@ impl_from_bits_!( int8x8_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( uint8x8_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( int16x4_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( uint16x4_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( int32x2_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( uint32x2_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( int64x1_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( float32x2_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( poly8x8_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( poly16x4_t: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); impl_from_bits_!( int8x16_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( uint8x16_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( poly8x16_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( int16x8_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( uint16x8_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( poly16x8_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( int32x4_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( uint32x4_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( float32x4_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( int64x2_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( uint64x2_t: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); #[allow(improper_ctypes)] diff --git a/library/stdarch/coresimd/ppsv/api/bitwise_ops.rs b/library/stdarch/coresimd/ppsv/api/bitwise_ops.rs index 46d7e1bedcca..67b4a1909fca 100644 --- a/library/stdarch/coresimd/ppsv/api/bitwise_ops.rs +++ b/library/stdarch/coresimd/ppsv/api/bitwise_ops.rs @@ -123,10 +123,10 @@ macro_rules! test_int_bitwise_ops { } #[cfg(test)] -macro_rules! test_bool_bitwise_ops { +macro_rules! test_mask_bitwise_ops { ($id:ident) => { #[test] - fn bool_arithmetic() { + fn mask_bitwise_ops() { use coresimd::simd::*; let t = $id::splat(true); diff --git a/library/stdarch/coresimd/ppsv/api/bitwise_reductions.rs b/library/stdarch/coresimd/ppsv/api/bitwise_reductions.rs index a3015597e911..840746ab7a75 100644 --- a/library/stdarch/coresimd/ppsv/api/bitwise_reductions.rs +++ b/library/stdarch/coresimd/ppsv/api/bitwise_reductions.rs @@ -1,4 +1,4 @@ -//! Implements portable bitwise vector reductions. +//! Implements portable horizontal bitwise vector reductions. #![allow(unused)] macro_rules! impl_bitwise_reductions { @@ -67,7 +67,7 @@ macro_rules! impl_bitwise_reductions { }; } -macro_rules! impl_bool_bitwise_reductions { +macro_rules! impl_mask_bitwise_reductions { ($id:ident, $elem_ty:ident, $internal_ty:ident) => { impl $id { /// Lane-wise bitwise `and` of the vector elements. diff --git a/library/stdarch/coresimd/ppsv/api/bitwise_scalar_ops.rs b/library/stdarch/coresimd/ppsv/api/bitwise_scalar_ops.rs index fa0d95266256..55efa752da97 100644 --- a/library/stdarch/coresimd/ppsv/api/bitwise_scalar_ops.rs +++ b/library/stdarch/coresimd/ppsv/api/bitwise_scalar_ops.rs @@ -1,4 +1,4 @@ -//! Lane-wise bitwise operations for integer and boolean vectors. +//! Lane-wise bitwise operations for integer vectors and vector masks. #![allow(unused)] macro_rules! impl_bitwise_scalar_ops { @@ -156,7 +156,7 @@ macro_rules! test_int_bitwise_scalar_ops { } #[cfg(test)] -macro_rules! test_bool_bitwise_scalar_ops { +macro_rules! test_mask_bitwise_scalar_ops { ($id:ident) => { #[test] fn bool_scalar_arithmetic() { diff --git a/library/stdarch/coresimd/ppsv/api/cmp.rs b/library/stdarch/coresimd/ppsv/api/cmp.rs index 0c0d7a4e2095..f6b42d5fa032 100644 --- a/library/stdarch/coresimd/ppsv/api/cmp.rs +++ b/library/stdarch/coresimd/ppsv/api/cmp.rs @@ -1,4 +1,4 @@ -//! Lane-wise vector comparisons returning boolean vectors. +//! Lane-wise vector comparisons returning vector masks. #![allow(unused)] macro_rules! impl_cmp { @@ -49,7 +49,7 @@ macro_rules! impl_cmp { }; } -macro_rules! impl_bool_cmp { +macro_rules! impl_mask_cmp { ($id:ident, $bool_ty:ident) => { impl $id { /// Lane-wise equality comparison. diff --git a/library/stdarch/coresimd/ppsv/api/bool_vectors.rs b/library/stdarch/coresimd/ppsv/api/masks.rs similarity index 96% rename from library/stdarch/coresimd/ppsv/api/bool_vectors.rs rename to library/stdarch/coresimd/ppsv/api/masks.rs index 0d5e49cc3ee0..a287e0feebe2 100644 --- a/library/stdarch/coresimd/ppsv/api/bool_vectors.rs +++ b/library/stdarch/coresimd/ppsv/api/masks.rs @@ -1,9 +1,12 @@ //! Minimal boolean vector implementation #![allow(unused)] -/// Minimal interface: all packed SIMD boolean vector types implement this. -macro_rules! impl_bool_minimal { +/// Minimal interface: all packed SIMD mask types implement this. +macro_rules! impl_mask_minimal { ($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => { + + impl super::api::Lanes<[u32; $elem_count]> for $id {} + impl $id { /// Creates a new instance with each vector elements initialized /// with the provided values. @@ -88,7 +91,7 @@ macro_rules! impl_bool_minimal { } #[cfg(test)] -macro_rules! test_bool_minimal { +macro_rules! test_mask_minimal { ($id:ident, $elem_count:expr) => { #[test] fn minimal() { diff --git a/library/stdarch/coresimd/ppsv/api/boolean_reductions.rs b/library/stdarch/coresimd/ppsv/api/masks_reductions.rs similarity index 96% rename from library/stdarch/coresimd/ppsv/api/boolean_reductions.rs rename to library/stdarch/coresimd/ppsv/api/masks_reductions.rs index f3a33be950f9..e348a42d7b7d 100644 --- a/library/stdarch/coresimd/ppsv/api/boolean_reductions.rs +++ b/library/stdarch/coresimd/ppsv/api/masks_reductions.rs @@ -1,7 +1,7 @@ -//! Lane-wise boolean vector reductions. +//! Horizontal mask reductions. #![allow(unused)] -macro_rules! impl_bool_reductions { +macro_rules! impl_mask_reductions { ($id:ident) => { impl $id { /// Are `all` vector lanes `true`? @@ -46,7 +46,7 @@ macro_rules! impl_bool_reductions { } #[cfg(test)] -macro_rules! test_bool_reductions { +macro_rules! test_mask_reductions { ($id:ident) => { #[test] fn all() { diff --git a/library/stdarch/coresimd/ppsv/api/masks_select.rs b/library/stdarch/coresimd/ppsv/api/masks_select.rs new file mode 100644 index 000000000000..517fd997c53e --- /dev/null +++ b/library/stdarch/coresimd/ppsv/api/masks_select.rs @@ -0,0 +1,59 @@ +//! Mask select method +#![allow(unused)] + +/// Implements mask select method +macro_rules! impl_mask_select { + ($id:ident, $elem_ty:ident, $elem_count:expr) => { + impl $id { + /// Selects elements of `a` and `b` using mask. + /// + /// For each lane, the result contains the element of `a` if the + /// mask is true, and the element of `b` otherwise. + #[inline] + pub fn select(self, a: T, b: T) -> T + where + T: super::api::Lanes<[u32; $elem_count]>, + { + use coresimd::simd_llvm::simd_select; + unsafe { simd_select(self, a, b) } + } + } + }; +} + +#[cfg(test)] +macro_rules! test_mask_select { + ($mask_id:ident, $vec_id:ident, $elem_ty:ident) => { + #[test] + fn select() { + use coresimd::simd::{$mask_id, $vec_id}; + let o = 1 as $elem_ty; + let t = 2 as $elem_ty; + + let a = $vec_id::splat(o); + let b = $vec_id::splat(t); + let m = a.lt(b); + assert_eq!(m.select(a, b), a); + + let m = b.lt(a); + assert_eq!(m.select(b, a), a); + + let mut c = a; + let mut d = b; + let mut m_e = $mask_id::splat(false); + for i in 0..$vec_id::lanes() { + if i % 2 == 0 { + let c_tmp = c.extract(i); + c = c.replace(i, d.extract(i)); + d = d.replace(i, c_tmp); + } else { + m_e = m_e.replace(i, true); + } + } + + let m = c.lt(d); + assert_eq!(m_e, m); + assert_eq!(m.select(c, d), a); + } + }; +} diff --git a/library/stdarch/coresimd/ppsv/api/minimal.rs b/library/stdarch/coresimd/ppsv/api/minimal.rs index 41a138ee29be..4470bd6c31a0 100644 --- a/library/stdarch/coresimd/ppsv/api/minimal.rs +++ b/library/stdarch/coresimd/ppsv/api/minimal.rs @@ -4,6 +4,8 @@ /// Minimal interface: all packed SIMD vector types implement this. macro_rules! impl_minimal { ($id:ident, $elem_ty:ident, $elem_count:expr, $($elem_name:ident),+) => { + impl super::api::Lanes<[u32; $elem_count]> for $id {} + impl $id { /// Creates a new instance with each vector elements initialized /// with the provided values. diff --git a/library/stdarch/coresimd/ppsv/api/minmax_reductions.rs b/library/stdarch/coresimd/ppsv/api/minmax_reductions.rs index 6a791d2df2a7..359a72ae249b 100644 --- a/library/stdarch/coresimd/ppsv/api/minmax_reductions.rs +++ b/library/stdarch/coresimd/ppsv/api/minmax_reductions.rs @@ -1,4 +1,4 @@ -//! Implements portable arithmetic vector reductions. +//! Implements portable horizontal arithmetic reductions. #![allow(unused)] macro_rules! impl_minmax_reductions { diff --git a/library/stdarch/coresimd/ppsv/api/mod.rs b/library/stdarch/coresimd/ppsv/api/mod.rs index 00fd73d9abd9..29e291caab0f 100644 --- a/library/stdarch/coresimd/ppsv/api/mod.rs +++ b/library/stdarch/coresimd/ppsv/api/mod.rs @@ -11,8 +11,7 @@ //! * [x] `Debug`, //! * [x] `Default` //! * [x] `PartialEq` -//! * [x] `PartialOrd` (TODO: re-write in term of -//! comparison operations and boolean reductions), +//! * [x] `PartialOrd` (TODO: tests) //! //! Non-floating-point vector types also implement: //! @@ -79,10 +78,6 @@ mod bitwise_scalar_ops; #[macro_use] mod bitwise_reductions; #[macro_use] -mod boolean_reductions; -#[macro_use] -mod bool_vectors; -#[macro_use] mod cmp; #[macro_use] mod default; @@ -99,6 +94,10 @@ mod hash; #[macro_use] mod load_store; #[macro_use] +mod masks; +#[macro_use] +mod masks_reductions; +#[macro_use] mod minimal; #[macro_use] mod minmax_reductions; @@ -116,19 +115,24 @@ mod partial_eq; //#[macro_use] //mod gather_scatter; #[macro_use] +mod masks_select; +#[macro_use] mod scalar_shifts; #[macro_use] mod shifts; +/// Sealed trait used for constraining select implementations. +pub trait Lanes {} + /// Defines a portable packed SIMD floating-point vector type. macro_rules! simd_f_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], [impl_load_store, $id, $elem_ty, $elem_count], - [impl_cmp, $id, $bool_ty], + [impl_cmp, $id, $mask_ty], [impl_arithmetic_ops, $id], [impl_arithmetic_scalar_ops, $id, $elem_ty], [impl_arithmetic_reductions, $id, $elem_ty], @@ -143,7 +147,7 @@ macro_rules! simd_f_ty { mod $test_mod { test_minimal!($id, $elem_ty, $elem_count); test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $bool_ty, 1. as $elem_ty, 0. as $elem_ty); + test_cmp!($id, $elem_ty, $mask_ty, 1. as $elem_ty, 0. as $elem_ty); test_arithmetic_ops!($id, $elem_ty); test_arithmetic_scalar_ops!($id, $elem_ty); test_arithmetic_reductions!($id, $elem_ty); @@ -151,6 +155,7 @@ macro_rules! simd_f_ty { test_neg_op!($id, $elem_ty); test_partial_eq!($id, 1. as $elem_ty, 0. as $elem_ty); test_default!($id, $elem_ty); + test_mask_select!($mask_ty, $id, $elem_ty); } ); } @@ -158,13 +163,13 @@ macro_rules! simd_f_ty { /// Defines a portable packed SIMD signed-integer vector type. macro_rules! simd_i_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], [impl_load_store, $id, $elem_ty, $elem_count], - [impl_cmp, $id, $bool_ty], + [impl_cmp, $id, $mask_ty], [impl_hash, $id, $elem_ty], [impl_arithmetic_ops, $id], [impl_arithmetic_scalar_ops, $id, $elem_ty], @@ -187,7 +192,7 @@ macro_rules! simd_i_ty { mod $test_mod { test_minimal!($id, $elem_ty, $elem_count); test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty); + test_cmp!($id, $elem_ty, $mask_ty, 1 as $elem_ty, 0 as $elem_ty); test_hash!($id, $elem_ty); test_arithmetic_ops!($id, $elem_ty); test_arithmetic_scalar_ops!($id, $elem_ty); @@ -202,6 +207,7 @@ macro_rules! simd_i_ty { test_hex_fmt!($id, $elem_ty); test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty); test_default!($id, $elem_ty); + test_mask_select!($mask_ty, $id, $elem_ty); } ); } @@ -209,13 +215,13 @@ macro_rules! simd_i_ty { /// Defines a portable packed SIMD unsigned-integer vector type. macro_rules! simd_u_ty { - ($id:ident : $elem_count:expr, $elem_ty:ident, $bool_ty:ident, $test_mod:ident, $test_macro:ident | + ($id:ident : $elem_count:expr, $elem_ty:ident, $mask_ty:ident, $test_mod:ident, $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], [impl_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], [impl_load_store, $id, $elem_ty, $elem_count], - [impl_cmp, $id, $bool_ty], + [impl_cmp, $id, $mask_ty], [impl_hash, $id, $elem_ty], [impl_arithmetic_ops, $id], [impl_arithmetic_scalar_ops, $id, $elem_ty], @@ -237,7 +243,7 @@ macro_rules! simd_u_ty { mod $test_mod { test_minimal!($id, $elem_ty, $elem_count); test_load_store!($id, $elem_ty); - test_cmp!($id, $elem_ty, $bool_ty, 1 as $elem_ty, 0 as $elem_ty); + test_cmp!($id, $elem_ty, $mask_ty, 1 as $elem_ty, 0 as $elem_ty); test_hash!($id, $elem_ty); test_arithmetic_ops!($id, $elem_ty); test_arithmetic_scalar_ops!($id, $elem_ty); @@ -251,23 +257,25 @@ macro_rules! simd_u_ty { test_hex_fmt!($id, $elem_ty); test_partial_eq!($id, 1 as $elem_ty, 0 as $elem_ty); test_default!($id, $elem_ty); + test_mask_select!($mask_ty, $id, $elem_ty); } ); } } -/// Defines a portable packed SIMD boolean vector type. -macro_rules! simd_b_ty { +/// Defines a portable packed SIMD mask type. +macro_rules! simd_m_ty { ($id:ident : $elem_count:expr, $elem_ty:ident, $test_mod:ident, $test_macro:ident | $($elem_tys:ident),+ | $($elem_name:ident),+ | $(#[$doc:meta])*) => { vector_impl!( [define_ty, $id, $($elem_tys),+ | $(#[$doc])*], - [impl_bool_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], + [impl_mask_minimal, $id, $elem_ty, $elem_count, $($elem_name),*], [impl_bitwise_ops, $id, true], [impl_bitwise_scalar_ops, $id, bool], - [impl_bool_bitwise_reductions, $id, bool, $elem_ty], - [impl_bool_reductions, $id], - [impl_bool_cmp, $id, $id], + [impl_mask_bitwise_reductions, $id, bool, $elem_ty], + [impl_mask_reductions, $id], + [impl_mask_select, $id, $elem_ty, $elem_count], + [impl_mask_cmp, $id, $id], [impl_eq, $id], [impl_partial_eq, $id], [impl_default, $id, bool] @@ -276,10 +284,10 @@ macro_rules! simd_b_ty { $test_macro!( #[cfg(test)] mod $test_mod { - test_bool_minimal!($id, $elem_count); - test_bool_bitwise_ops!($id); - test_bool_bitwise_scalar_ops!($id); - test_bool_reductions!($id); + test_mask_minimal!($id, $elem_count); + test_mask_bitwise_ops!($id); + test_mask_bitwise_scalar_ops!($id); + test_mask_reductions!($id); test_bitwise_reductions!($id, true); test_cmp!($id, $elem_ty, $id, true, false); test_partial_eq!($id, true, false); diff --git a/library/stdarch/coresimd/ppsv/mod.rs b/library/stdarch/coresimd/ppsv/mod.rs index 3067905a640a..08b7ce80d6ca 100644 --- a/library/stdarch/coresimd/ppsv/mod.rs +++ b/library/stdarch/coresimd/ppsv/mod.rs @@ -19,10 +19,11 @@ //! `{t}{l_w}x{l_n}`: //! //! * `t`: type - single letter corresponding to the following Rust literal -//! types: * `i`: signed integer +//! types: +//! * `i`: signed integer //! * `u`: unsigned integer //! * `f`: floating point -//! * `b`: boolean +//! * `m`: vector mask //! * `l_w`: lane width in bits //! * `l_n`: number of lanes //! diff --git a/library/stdarch/coresimd/ppsv/v128.rs b/library/stdarch/coresimd/ppsv/v128.rs index bff8cd758594..d2bdae49b778 100644 --- a/library/stdarch/coresimd/ppsv/v128.rs +++ b/library/stdarch/coresimd/ppsv/v128.rs @@ -1,83 +1,103 @@ //! 128-bit wide portable packed vector types. -use coresimd::simd::{b8x2, b8x4, b8x8}; simd_i_ty! { - i8x16: 16, i8, b8x16, i8x16_tests, test_v128 | + i8x16: 16, i8, m8x16, i8x16_tests, test_v128 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | /// A 128-bit vector with 16 `i8` lanes. } simd_u_ty! { - u8x16: 16, u8, b8x16, u8x16_tests, test_v128 | + u8x16: 16, u8, m8x16, u8x16_tests, test_v128 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | /// A 128-bit vector with 16 `u8` lanes. } -simd_b_ty! { - b8x16: 16, i8, b8x16_tests, test_v128 | +simd_m_ty! { + m8x16: 16, i8, m8x16_tests, test_v128 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 | - /// A 128-bit vector with 16 `bool` lanes. + /// A 128-bit vector mask with 16 lanes. } simd_i_ty! { - i16x8: 8, i16, b8x8, i16x8_tests, test_v128 | + i16x8: 8, i16, m16x8, i16x8_tests, test_v128 | i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 128-bit vector with 8 `i16` lanes. } simd_u_ty! { - u16x8: 8, u16, b8x8, u16x8_tests, test_v128 | + u16x8: 8, u16, m16x8, u16x8_tests, test_v128 | u16, u16, u16, u16, u16, u16, u16, u16 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 128-bit vector with 8 `u16` lanes. } +simd_m_ty! { + m16x8: 8, i16, m16x8_tests, test_v128 | + i16, i16, i16, i16, i16, i16, i16, i16 | + x0, x1, x2, x3, x4, x5, x6, x7 | + /// A 128-bit vector mask with 8 lanes. +} + simd_i_ty! { - i32x4: 4, i32, b8x4, i32x4_tests, test_v128 | + i32x4: 4, i32, m32x4, i32x4_tests, test_v128 | i32, i32, i32, i32 | x0, x1, x2, x3 | /// A 128-bit vector with 4 `i32` lanes. } simd_u_ty! { - u32x4: 4, u32, b8x4, u32x4_tests, test_v128 | + u32x4: 4, u32, m32x4, u32x4_tests, test_v128 | u32, u32, u32, u32 | x0, x1, x2, x3 | /// A 128-bit vector with 4 `u32` lanes. } simd_f_ty! { - f32x4: 4, f32, b8x4, f32x4_tests, test_v128 | + f32x4: 4, f32, m32x4, f32x4_tests, test_v128 | f32, f32, f32, f32 | x0, x1, x2, x3 | /// A 128-bit vector with 4 `f32` lanes. } +simd_m_ty! { + m32x4: 4, i32, m32x4_tests, test_v128 | + i32, i32, i32, i32 | + x0, x1, x2, x3 | + /// A 128-bit vector mask with 4 lanes. +} + simd_i_ty! { - i64x2: 2, i64, b8x2, i64x2_tests, test_v128 | + i64x2: 2, i64, m64x2, i64x2_tests, test_v128 | i64, i64 | x0, x1 | /// A 128-bit vector with 2 `u64` lanes. } simd_u_ty! { - u64x2: 2, u64, b8x2, u64x2_tests, test_v128 | + u64x2: 2, u64, m64x2, u64x2_tests, test_v128 | u64, u64 | x0, x1 | /// A 128-bit vector with 2 `u64` lanes. } simd_f_ty! { - f64x2: 2, f64, b8x2, f64x2_tests, test_v128 | + f64x2: 2, f64, m64x2, f64x2_tests, test_v128 | f64, f64 | x0, x1 | /// A 128-bit vector with 2 `f64` lanes. } +simd_m_ty! { + m64x2: 2, i64, m64x4_tests, test_v128 | + i64, i64 | + x0, x1 | + /// A 128-bit vector mask with 2 lanes. +} + #[cfg(target_arch = "x86")] use coresimd::arch::x86::{__m128, __m128d, __m128i}; #[cfg(target_arch = "x86_64")] @@ -149,14 +169,17 @@ impl_from_bits!( u64x2_from_bits, test_v128 | i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); from_bits_x86!(u64x2, u64, u64x2_from_bits_x86); from_bits_arm!( @@ -171,14 +194,17 @@ impl_from_bits!( i64x2_from_bits, test_v128 | u64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); from_bits_x86!(i64x2, i64, i64x2_from_bits_x86); from_bits_arm!( @@ -193,14 +219,17 @@ impl_from_bits!( f64x2_from_bits, test_v128 | i64x2, u64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); from_bits_x86!(f64x2, f64, f64x2_from_bits_x86); from_bits_arm!( @@ -216,13 +245,16 @@ impl_from_bits!( test_v128 | u64x2, i64x2, f64x2, + m64x2, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); from_bits_x86!(u32x4, u32, u32x4_from_bits_x86); from_bits_arm!( @@ -238,13 +270,16 @@ impl_from_bits!( test_v128 | u64x2, i64x2, f64x2, + m64x2, u32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); from_bits_x86!(i32x4, i32, i32x4_from_bits_x86); from_bits_arm!( @@ -260,13 +295,16 @@ impl_from_bits!( test_v128 | u64x2, i64x2, f64x2, + m64x2, i32x4, u32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); from_bits_x86!(f32x4, f32, f32x4_from_bits_x86); from_bits_arm!( @@ -282,13 +320,16 @@ impl_from_bits!( test_v128 | u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); from_bits_x86!(u16x8, u16, u16x8_from_bits_x86); from_bits_arm!( @@ -304,13 +345,16 @@ impl_from_bits!( test_v128 | u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); from_bits_x86!(i16x8, i16, i16x8_from_bits_x86); from_bits_arm!( @@ -326,13 +370,16 @@ impl_from_bits!( test_v128 | u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, i8x16, - b8x16 + m8x16 ); from_bits_x86!(u8x16, u8, u8x16_from_bits_x86); from_bits_arm!( @@ -348,13 +395,16 @@ impl_from_bits!( test_v128 | u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, - b8x16 + m8x16 ); from_bits_x86!(i8x16, i8, i8x16_from_bits_x86); from_bits_arm!( @@ -370,51 +420,50 @@ impl_from!( test_v128 | f32x2, u64x2, i64x2, + m64x2, u32x2, i32x2, + m32x2, u16x2, i16x2, + m16x2, u8x2, - i8x2 -); -impl_from!( - f32x4: f32, - f32x4_from, - test_v128 | f64x4, - u64x4, - i64x4, - u32x4, - i32x4, - u16x4, - i16x4, - u8x4, - i8x4 + i8x2, + m8x2 ); impl_from!( u64x2: u64, u64x2_from, - test_v128 | f32x2, - f64x2, + test_v128 | f64x2, i64x2, + m64x2, + f32x2, i32x2, u32x2, + m32x2, i16x2, u16x2, + m16x2, i8x2, - u8x2 + u8x2, + m8x2 ); impl_from!( i64x2: i64, i64x2_from, - test_v128 | f32x2, - f64x2, + test_v128 | f64x2, u64x2, + m64x2, i32x2, u32x2, + f32x2, + m32x2, i16x2, u16x2, + m16x2, i8x2, - u8x2 + u8x2, + m8x2 ); impl_from!( u32x4: u32, @@ -422,12 +471,16 @@ impl_from!( test_v128 | f64x4, u64x4, i64x4, + m64x4, f32x4, i32x4, + m32x4, u16x4, i16x4, + m16x4, u8x4, - i8x4 + i8x4, + m8x4 ); impl_from!( i32x4: i32, @@ -435,25 +488,51 @@ impl_from!( test_v128 | f64x4, u64x4, i64x4, + m64x4, f32x4, u32x4, + m32x4, u16x4, i16x4, + m16x4, u8x4, - i8x4 + i8x4, + m8x4 ); +impl_from!( + f32x4: f32, + f32x4_from, + test_v128 | f64x4, + u64x4, + i64x4, + m64x4, + u32x4, + i32x4, + m32x4, + u16x4, + i16x4, + m16x4, + u8x4, + i8x4, + m8x4 +); + impl_from!( i16x8: i16, i16x8_from, test_v128 | f64x8, u64x8, i64x8, + m1x8, f32x8, u32x8, i32x8, + m32x8, u16x8, + m16x8, u8x8, - i8x8 + i8x8, + m8x8 ); impl_from!( u16x8: u16, @@ -461,10 +540,40 @@ impl_from!( test_v128 | f64x8, u64x8, i64x8, + m1x8, f32x8, u32x8, i32x8, + m32x8, i16x8, + m16x8, u8x8, - i8x8 + i8x8, + m8x8 +); + +impl_from!(m8x16: i8, m8x16_from, test_v128 | m1x16, m16x16); + +impl_from!( + m16x8: i16, + m16x8_from, + test_v128 | m1x8, + m32x8, + m8x8 +); + +impl_from!( + m32x4: i32, + m32x4_from, + test_v128 | m64x4, + m16x4, + m8x4 +); + +impl_from!( + m64x2: i64, + m64x2_from, + test_v128 | m32x2, + m16x2, + m8x2 ); diff --git a/library/stdarch/coresimd/ppsv/v16.rs b/library/stdarch/coresimd/ppsv/v16.rs index 389aaf0e6a25..8bc08452c455 100644 --- a/library/stdarch/coresimd/ppsv/v16.rs +++ b/library/stdarch/coresimd/ppsv/v16.rs @@ -1,40 +1,44 @@ //! 16-bit wide portable packed vector types. simd_i_ty! { - i8x2: 2, i8, b8x2, i8x2_tests, test_v16 | + i8x2: 2, i8, m8x2, i8x2_tests, test_v16 | i8, i8 | x0, x1 | /// A 16-bit wide vector with 2 `i8` lanes. } simd_u_ty! { - u8x2: 2, u8, b8x2, u8x2_tests, test_v16 | + u8x2: 2, u8, m8x2, u8x2_tests, test_v16 | u8, u8 | x0, x1 | /// A 16-bit wide vector with 2 `u8` lanes. } -simd_b_ty! { - b8x2: 2, i8, b8x2_tests, test_v16 | +simd_m_ty! { + m8x2: 2, i8, m8x2_tests, test_v16 | i8, i8 | x0, x1 | - /// A 16-bit wide vector with 2 `bool` lanes. + /// A 16-bit wide vector mask with 2 lanes. } -impl_from_bits!(i8x2: i8, i8x2_from_bits, test_v16 | u8x2, b8x2); -impl_from_bits!(u8x2: u8, u8x2_from_bits, test_v16 | i8x2, b8x2); +impl_from_bits!(i8x2: i8, i8x2_from_bits, test_v16 | u8x2, m8x2); +impl_from_bits!(u8x2: u8, u8x2_from_bits, test_v16 | i8x2, m8x2); impl_from!( i8x2: i8, i8x2_from, test_v16 | f64x2, u64x2, + m64x2, i64x2, f32x2, u32x2, i32x2, + m32x2, u16x2, - u8x2 + m16x2, + u8x2, + m8x2 ); impl_from!( u8x2: u8, @@ -42,9 +46,21 @@ impl_from!( test_v16 | f64x2, u64x2, i64x2, + m64x2, f32x2, u32x2, i32x2, + m32x2, u16x2, - i8x2 + m16x2, + i8x2, + m8x2 +); + +impl_from!( + m8x2: i8, + m8x2_from, + test_v16 | m64x2, + m32x2, + m16x2 ); diff --git a/library/stdarch/coresimd/ppsv/v256.rs b/library/stdarch/coresimd/ppsv/v256.rs index 1af9ff4aae3c..849897d4eab1 100644 --- a/library/stdarch/coresimd/ppsv/v256.rs +++ b/library/stdarch/coresimd/ppsv/v256.rs @@ -1,8 +1,7 @@ //! 256-bit wide portable packed vector types. -use coresimd::simd::{b8x16, b8x4, b8x8}; simd_i_ty! { - i8x32: 32, i8, b8x32, i8x32_tests, test_v256 | + i8x32: 32, i8, m8x32, i8x32_tests, test_v256 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -13,7 +12,7 @@ simd_i_ty! { } simd_u_ty! { - u8x32: 32, u8, b8x32, u8x32_tests, test_v256 | + u8x32: 32, u8, m8x32, u8x32_tests, test_v256 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -23,19 +22,19 @@ simd_u_ty! { /// A 256-bit vector with 32 `u8` lanes. } -simd_b_ty! { - b8x32: 32, i8, b8x32_tests, test_v256 | +simd_m_ty! { + m8x32: 32, i8, m8x32_tests, test_v256 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31 | - /// A 256-bit vector with 32 `bool` lanes. + /// A 256-bit vector mask with 32 lanes. } simd_i_ty! { - i16x16: 16, i16, b8x16, i16x16_tests, test_v256 | + i16x16: 16, i16, m16x16, i16x16_tests, test_v256 | i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -44,7 +43,7 @@ simd_i_ty! { } simd_u_ty! { - u16x16: 16, u16, b8x16, u16x16_tests, test_v256 | + u16x16: 16, u16, m16x16, u16x16_tests, test_v256 | u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -52,48 +51,71 @@ simd_u_ty! { /// A 256-bit vector with 16 `u16` lanes. } +simd_m_ty! { + m16x16: 16, i16, m16x16_tests, test_v256 | + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16 | + x0, x1, x2, x3, x4, x5, x6, x7, + x8, x9, x10, x11, x12, x13, x14, x15 | + /// A 256-bit vector mask with 16 lanes. +} + simd_i_ty! { - i32x8: 8, i32, b8x8, i32x8_tests, test_v256 | + i32x8: 8, i32, m32x8, i32x8_tests, test_v256 | i32, i32, i32, i32, i32, i32, i32, i32 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 256-bit vector with 8 `i32` lanes. } simd_u_ty! { - u32x8: 8, u32, b8x8, u32x8_tests, test_v256 | + u32x8: 8, u32, m32x8, u32x8_tests, test_v256 | u32, u32, u32, u32, u32, u32, u32, u32 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 256-bit vector with 8 `u32` lanes. } simd_f_ty! { - f32x8: 8, f32, b8x8, f32x8_tests, test_v256 | + f32x8: 8, f32, m32x8, f32x8_tests, test_v256 | f32, f32, f32, f32, f32, f32, f32, f32 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 256-bit vector with 8 `f32` lanes. } +simd_m_ty! { + m32x8: 8, i32, m32x8_tests, test_v256 | + i32, i32, i32, i32, i32, i32, i32, i32 | + x0, x1, x2, x3, x4, x5, x6, x7 | + /// A 256-bit vector mask with 8 lanes. +} + simd_i_ty! { - i64x4: 4, i64, b8x4, i64x4_tests, test_v256 | + i64x4: 4, i64, m64x4, i64x4_tests, test_v256 | i64, i64, i64, i64 | x0, x1, x2, x3 | /// A 256-bit vector with 4 `i64` lanes. } simd_u_ty! { - u64x4: 4, u64, b8x4, u64x4_tests, test_v256 | + u64x4: 4, u64, m64x4, u64x4_tests, test_v256 | u64, u64, u64, u64 | x0, x1, x2, x3 | /// A 256-bit vector with 4 `u64` lanes. } simd_f_ty! { - f64x4: 4, f64, b8x4, f64x4_tests, test_v256 | + f64x4: 4, f64, m64x4, f64x4_tests, test_v256 | f64, f64, f64, f64 | x0, x1, x2, x3 | /// A 256-bit vector with 4 `f64` lanes. } +simd_m_ty! { + m64x4: 4, i64, m64x4_tests, test_v256 | + i64, i64, i64, i64 | + x0, x1, x2, x3 | + /// A 256-bit vector mask with 4 lanes. +} + #[cfg(target_arch = "x86")] use coresimd::arch::x86::{__m256, __m256d, __m256i}; #[cfg(target_arch = "x86_64")] @@ -112,13 +134,16 @@ impl_from_bits!( test_v256 | u64x4, i64x4, f64x4, + m64x4, u32x8, i32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, - b8x32 + m8x32 ); from_bits_x86!(i8x32, i8, i8x32_from_bits_x86); @@ -128,13 +153,16 @@ impl_from_bits!( test_v256 | u64x4, i64x4, f64x4, + m64x4, u32x8, i32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, i8x32, - b8x32 + m8x32 ); from_bits_x86!(u8x32, u8, u8x32_from_bits_x86); @@ -144,13 +172,16 @@ impl_from_bits!( test_v256 | u64x4, i64x4, f64x4, + m64x4, u32x8, i32x8, f32x8, + m32x8, u16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); from_bits_x86!(i16x16, i16, i16x16_from_bits_x86); @@ -160,13 +191,16 @@ impl_from_bits!( test_v256 | u64x4, i64x4, f64x4, + m64x4, u32x8, i32x8, f32x8, + m32x8, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); from_bits_x86!(u16x16, u16, u16x16_from_bits_x86); @@ -176,13 +210,16 @@ impl_from_bits!( test_v256 | u64x4, i64x4, f64x4, + m64x4, u32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); from_bits_x86!(i32x8, i32, i32x8_from_bits_x86); @@ -192,13 +229,16 @@ impl_from_bits!( test_v256 | u64x4, i64x4, f64x4, + m64x4, i32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); from_bits_x86!(u32x8, u32, u32x8_from_bits_x86); @@ -208,13 +248,16 @@ impl_from_bits!( test_v256 | u64x4, i64x4, f64x4, + m64x4, i32x8, u32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); from_bits_x86!(f32x8, f32, f32x8_from_bits_x86); @@ -223,14 +266,17 @@ impl_from_bits!( i64x4_from_bits, test_v256 | u64x4, f64x4, + m64x4, i32x8, u32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); from_bits_x86!(i64x4, i64, i64x4_from_bits_x86); @@ -239,14 +285,17 @@ impl_from_bits!( u64x4_from_bits, test_v256 | i64x4, f64x4, + m64x4, i32x8, u32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); from_bits_x86!(u64x4, u64, u64x4_from_bits_x86); @@ -255,14 +304,17 @@ impl_from_bits!( f64x4_from_bits, test_v256 | i64x4, u64x4, + m64x4, i32x8, u32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); from_bits_x86!(f64x4, f64, f64x4_from_bits_x86); @@ -271,39 +323,51 @@ impl_from!( f64x4_from, test_v256 | u64x4, i64x4, + m64x4, u32x4, i32x4, f32x4, + m32x4, u16x4, i16x4, + m16x4, u8x4, - i8x4 + i8x4, + m8x4 ); impl_from!( i64x4: i64, i64x4_from, test_v256 | u64x4, f64x4, + m64x4, u32x4, i32x4, f32x4, + m32x4, u16x4, i16x4, + m16x4, u8x4, - i8x4 + i8x4, + m8x4 ); impl_from!( u64x4: u64, u64x4_from, test_v256 | i64x4, f64x4, + m64x4, u32x4, i32x4, f32x4, + m32x4, u16x4, i16x4, + m16x4, u8x4, - i8x4 + i8x4, + m8x4 ); impl_from!( f32x8: f32, @@ -311,12 +375,16 @@ impl_from!( test_v256 | u64x8, i64x8, f64x8, + m1x8, u32x8, i32x8, + m32x8, u16x8, i16x8, + m16x8, u8x8, - i8x8 + i8x8, + m8x8 ); impl_from!( i32x8: i32, @@ -324,12 +392,16 @@ impl_from!( test_v256 | u64x8, i64x8, f64x8, + m1x8, u32x8, f32x8, + m32x8, u16x8, i16x8, + m16x8, u8x8, - i8x8 + i8x8, + m8x8 ); impl_from!( u32x8: u32, @@ -337,12 +409,16 @@ impl_from!( test_v256 | u64x8, i64x8, f64x8, + m1x8, i32x8, f32x8, + m32x8, u16x8, i16x8, + m16x8, u8x8, - i8x8 + i8x8, + m8x8 ); impl_from!( i16x16: i16, @@ -350,9 +426,12 @@ impl_from!( test_v256 | u32x16, i32x16, f32x16, + m1x16, u16x16, + m16x16, u8x16, - i8x16 + i8x16, + m8x16 ); impl_from!( u16x16: u16, @@ -360,21 +439,51 @@ impl_from!( test_v256 | u32x16, i32x16, f32x16, + m1x16, i16x16, + m16x16, u8x16, - i8x16 + i8x16, + m8x16 ); impl_from!( i8x32: i8, i8x32_from, test_v256 | u16x32, i16x32, - u8x32 + u8x32, + m8x32 ); impl_from!( u8x32: u8, u8x32_from, test_v256 | u16x32, i16x32, - i8x32 + i8x32, + m8x32 +); + +impl_from!(m8x32: i8, m8x32_from, test_v256 | m1x32); + +impl_from!( + m16x16: i16, + m16x16_from, + test_v256 | m1x16, + m8x16 +); + +impl_from!( + m32x8: i32, + m32x8_from, + test_v256 | m1x8, + m16x8, + m8x8 +); + +impl_from!( + m64x4: i64, + m64x4_from, + test_v256 | m32x4, + m16x4, + m8x4 ); diff --git a/library/stdarch/coresimd/ppsv/v32.rs b/library/stdarch/coresimd/ppsv/v32.rs index fc51344bc31c..854837e9ba36 100644 --- a/library/stdarch/coresimd/ppsv/v32.rs +++ b/library/stdarch/coresimd/ppsv/v32.rs @@ -1,72 +1,82 @@ //! 32-bit wide portable packed vector types. -use coresimd::simd::b8x2; simd_i_ty! { - i16x2: 2, i16, b8x2, i16x2_tests, test_v32 | + i16x2: 2, i16, m16x2, i16x2_tests, test_v32 | i16, i16 | x0, x1 | /// A 32-bit wide vector with 2 `i16` lanes. } simd_u_ty! { - u16x2: 2, u16, b8x2, u16x2_tests, test_v32 | + u16x2: 2, u16, m16x2, u16x2_tests, test_v32 | u16, u16 | x0, x1 | /// A 32-bit wide vector with 2 `u16` lanes. } +simd_m_ty! { + m16x2: 2, i16, m16x2_tests, test_v32 | + i16, i16 | + x0, x1 | + /// A 32-bit wide vector mask with 2 lanes. +} + simd_i_ty! { - i8x4: 4, i8, b8x4, i8x4_tests, test_v32 | + i8x4: 4, i8, m8x4, i8x4_tests, test_v32 | i8, i8, i8, i8 | x0, x1, x2, x3 | /// A 32-bit wide vector with 4 `i8` lanes. } simd_u_ty! { - u8x4: 4, u8, b8x4, u8x4_tests, test_v32 | + u8x4: 4, u8, m8x4, u8x4_tests, test_v32 | u8, u8, u8, u8 | x0, x1, x2, x3 | /// A 32-bit wide vector with 4 `u8` lanes. } -simd_b_ty! { - b8x4: 4, i8, b8x4_tests, test_v32 | +simd_m_ty! { + m8x4: 4, i8, m8x4_tests, test_v32 | i8, i8, i8, i8 | x0, x1, x2, x3 | - /// A 32-bit wide vector with 4 `bool` lanes. + /// A 32-bit wide vector mask 4 lanes. } impl_from_bits!( i16x2: i16, i16x2_from_bits, test_v32 | u16x2, + m16x2, i8x4, u8x4, - b8x4 + m8x4 ); impl_from_bits!( u16x2: u16, u16x2_from_bits, test_v32 | i16x2, + m16x2, i8x4, u8x4, - b8x4 + m8x4 ); impl_from_bits!( i8x4: i8, i8x2_from_bits, test_v32 | i16x2, u16x2, + m16x2, u8x4, - b8x4 + m8x4 ); impl_from_bits!( u8x4: u8, u8x2_from_bits, test_v32 | i16x2, u16x2, + m16x2, i8x4, - b8x4 + m8x4 ); impl_from!( @@ -75,12 +85,16 @@ impl_from!( test_v32 | f64x2, u64x2, i64x2, + m64x2, f32x2, u32x2, i32x2, + m32x2, u16x2, + m16x2, u8x2, - i8x2 + i8x2, + m8x2 ); impl_from!( @@ -89,12 +103,16 @@ impl_from!( test_v32 | f64x2, u64x2, i64x2, + m64x2, f32x2, u32x2, i32x2, + m32x2, i16x2, + m16x2, u8x2, - i8x2 + i8x2, + m8x2 ); impl_from!( @@ -103,12 +121,16 @@ impl_from!( test_v32 | f64x4, u64x4, i64x4, + m64x4, u32x4, i32x4, f32x4, + m32x4, u16x4, i16x4, - u8x4 + m16x4, + u8x4, + m8x4 ); impl_from!( @@ -117,10 +139,30 @@ impl_from!( test_v32 | f64x4, u64x4, i64x4, + m64x4, u32x4, i32x4, f32x4, + m32x4, u16x4, i16x4, - i8x4 + m16x4, + i8x4, + m8x4 +); + +impl_from!( + m8x4: i8, + m8x4_from, + test_v32 | m64x4, + m32x4, + m16x4 +); + +impl_from!( + m16x2: i16, + m16x2_from, + test_v32 | m64x2, + m32x2, + m8x2 ); diff --git a/library/stdarch/coresimd/ppsv/v512.rs b/library/stdarch/coresimd/ppsv/v512.rs index dcef861d3daf..7fd42175a90a 100644 --- a/library/stdarch/coresimd/ppsv/v512.rs +++ b/library/stdarch/coresimd/ppsv/v512.rs @@ -1,8 +1,11 @@ //! 512-bit wide portable packed vector types. -use coresimd::simd::{b8x16, b8x32, b8x8}; + +// FIXME: Here the m1xN masks should map to AVX-512 m1xN registers, +// but due to lack of rustc support (shouldn't be hard to add) these masks +// are currently implemented as being 512-bit wide. simd_i_ty! { - i8x64: 64, i8, b8x64, i8x64_tests, test_v512 | + i8x64: 64, i8, m1x64, i8x64_tests, test_v512 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, @@ -19,7 +22,7 @@ simd_i_ty! { } simd_u_ty! { - u8x64: 64, u8, b8x64, u8x64_tests, test_v512 | + u8x64: 64, u8, m1x64, u8x64_tests, test_v512 | u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, u8, @@ -35,8 +38,8 @@ simd_u_ty! { /// A 512-bit vector with 64 `u8` lanes. } -simd_b_ty! { - b8x64: 64, i8, b8x64_tests, test_v512 | +simd_m_ty! { + m1x64: 64, i8, m1x64_tests, test_v512 | i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, @@ -49,11 +52,11 @@ simd_b_ty! { x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63 | - /// A 512-bit vector with 64 `bool` lanes. + /// A 64-bit vector mask with 64 lanes (FIXME: 512-bit wide). } simd_i_ty! { - i16x32: 32, i16, b8x32, i16x32_tests, test_v512 | + i16x32: 32, i16, m1x32, i16x32_tests, test_v512 | i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, @@ -66,7 +69,7 @@ simd_i_ty! { } simd_u_ty! { - u16x32: 32, u16, b8x32, u16x32_tests, test_v512 | + u16x32: 32, u16, m1x32, u16x32_tests, test_v512 | u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, u16, @@ -77,8 +80,22 @@ simd_u_ty! { x24, x25, x26, x27, x28, x29, x30, x31 | /// A 512-bit vector with 32 `u16` lanes. } + +simd_m_ty! { + m1x32: 32, i16, m1x32_tests, test_v512 | + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16, + i16, i16, i16, i16, i16, i16, i16, i16 | + x0, x1, x2, x3, x4, x5, x6, x7, + x8, x9, x10, x11, x12, x13, x14, x15, + x16, x17, x18, x19, x20, x21, x22, x23, + x24, x25, x26, x27, x28, x29, x30, x31 | + /// A 32-bit vector mask with 32 lanes (FIXME: 512-bit wide). +} + simd_i_ty! { - i32x16: 16, i32, b8x16, i32x16_tests, test_v512 | + i32x16: 16, i32, m1x16, i32x16_tests, test_v512 | i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -87,7 +104,7 @@ simd_i_ty! { } simd_u_ty! { - u32x16: 16, u32, b8x16, u32x16_tests, test_v512 | + u32x16: 16, u32, m1x16, u32x16_tests, test_v512 | u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32, u32 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -96,7 +113,7 @@ simd_u_ty! { } simd_f_ty! { - f32x16: 16, f32, b8x16, f32x16_tests, test_v512 | + f32x16: 16, f32, m1x16, f32x16_tests, test_v512 | f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32 | x0, x1, x2, x3, x4, x5, x6, x7, @@ -104,40 +121,60 @@ simd_f_ty! { /// A 512-bit vector with 16 `f32` lanes. } +simd_m_ty! { + m1x16: 16, i32, m1x16_tests, test_v512 | + i32, i32, i32, i32, i32, i32, i32, i32, + i32, i32, i32, i32, i32, i32, i32, i32 | + x0, x1, x2, x3, x4, x5, x6, x7, + x8, x9, x10, x11, x12, x13, x14, x15 | + /// A 16-bit vector mask with 16 lanes (FIXME: 512-bit wide). +} + simd_i_ty! { - i64x8: 8, i64, b8x8, i64x8_tests, test_v512 | + i64x8: 8, i64, m1x8, i64x8_tests, test_v512 | i64, i64, i64, i64, i64, i64, i64, i64 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 512-bit vector with 8 `i64` lanes. } simd_u_ty! { - u64x8: 8, u64, b8x8, u64x8_tests, test_v512 | + u64x8: 8, u64, m1x8, u64x8_tests, test_v512 | u64, u64, u64, u64, u64, u64, u64, u64 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 512-bit vector with 8 `u64` lanes. } simd_f_ty! { - f64x8: 8, f64, b8x8, f64x8_tests, test_v512 | + f64x8: 8, f64, m1x8, f64x8_tests, test_v512 | f64, f64, f64, f64, f64, f64, f64, f64 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 512-bit vector with 8 `f64` lanes. } +simd_m_ty! { + m1x8: 8, i64, m1x8_tests, test_v512 | + i64, i64, i64, i64, + i64, i64, i64, i64 | + x0, x1, x2, x3, x4, x5, x6, x7 | + /// A 8-bit vector mask with 8 lanes (FIXME: 512-bit wide). +} + impl_from_bits!( i8x64: i8, i8x64_from_bits, test_v512 | u64x8, i64x8, f64x8, + m1x8, // FIXME u32x16, i32x16, f32x16, + m1x16, // FIXME u16x32, i16x32, + m1x32, // FIXME u8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( u8x64: u8, @@ -145,13 +182,16 @@ impl_from_bits!( test_v512 | u64x8, i64x8, f64x8, + m1x8, // FIXME u32x16, i32x16, f32x16, + m1x16, // FIXME u16x32, i16x32, + m1x32, // FIXME i8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( i16x32: i16, @@ -159,13 +199,16 @@ impl_from_bits!( test_v512 | u64x8, i64x8, f64x8, + m1x8, // FIXME u32x16, i32x16, f32x16, + m1x16, // FIXME u16x32, + m1x32, // FIXME i8x64, u8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( u16x32: u16, @@ -173,13 +216,16 @@ impl_from_bits!( test_v512 | u64x8, i64x8, f64x8, + m1x8, // FIXME u32x16, i32x16, f32x16, + m1x16, // FIXME i16x32, + m1x32, // FIXME i8x64, u8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( i32x16: i32, @@ -187,13 +233,16 @@ impl_from_bits!( test_v512 | u64x8, i64x8, f64x8, + m1x8, // FIXME u32x16, f32x16, + m1x16, // FIXME u16x32, i16x32, + m1x32, // FIXME i8x64, u8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( u32x16: u32, @@ -201,13 +250,16 @@ impl_from_bits!( test_v512 | u64x8, i64x8, f64x8, + m1x8, // FIXME i32x16, f32x16, + m1x16, // FIXME u16x32, i16x32, + m1x32, // FIXME i8x64, u8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( f32x16: f32, @@ -215,55 +267,67 @@ impl_from_bits!( test_v512 | u64x8, i64x8, f64x8, + m1x8, // FIXME u32x16, i32x16, + m1x16, // FIXME u16x32, i16x32, + m1x32, // FIXME i8x64, u8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( i64x8: i64, i64x8_from_bits, test_v512 | u64x8, f64x8, + m1x8, // FIXME u32x16, i32x16, f32x16, + m1x16, // FIXME u16x32, i16x32, + m1x32, // FIXME i8x64, u8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( u64x8: u64, u64x8_from_bits, test_v512 | i64x8, f64x8, + m1x8, // FIXME u32x16, i32x16, f32x16, + m1x16, // FIXME u16x32, i16x32, + m1x32, // FIXME i8x64, u8x64, - b8x64 + m1x64 // FIXME ); impl_from_bits!( f64x8: f64, f64x8_from_bits, test_v512 | u64x8, i64x8, + m1x8, // FIXME u32x16, i32x16, f32x16, + m1x16, // FIXME u16x32, i16x32, + m1x32, // FIXME i8x64, u8x64, - b8x64 + m1x64 // FIXME ); impl_from!( @@ -271,39 +335,51 @@ impl_from!( f64x8_from, test_v512 | u64x8, i64x8, + m1x8, u32x8, i32x8, f32x8, + m32x8, u16x8, i16x8, + m16x8, u8x8, - i8x8 + i8x8, + m8x8 ); impl_from!( i64x8: i64, i64x8_from, test_v512 | u64x8, f64x8, + m1x8, u32x8, i32x8, f32x8, + m32x8, u16x8, i16x8, + m16x8, u8x8, - i8x8 + i8x8, + m8x8 ); impl_from!( u64x8: u64, u64x8_from, test_v512 | i64x8, f64x8, + m1x8, u32x8, i32x8, f32x8, + m32x8, u16x8, i16x8, + m16x8, u8x8, - i8x8 + i8x8, + m8x8 ); impl_from!( @@ -311,30 +387,39 @@ impl_from!( f32x16_from, test_v512 | u32x16, i32x16, + m1x16, u16x16, i16x16, + m16x16, u8x16, - i8x16 + i8x16, + m8x16 ); impl_from!( i32x16: i32, i32x16_from, test_v512 | u32x16, f32x16, + m1x16, u16x16, i16x16, + m16x16, u8x16, - i8x16 + i8x16, + m8x16 ); impl_from!( u32x16: u32, u32x16_from, test_v512 | i32x16, f32x16, + m1x16, u16x16, i16x16, + m16x16, u8x16, - i8x16 + i8x16, + m8x16 ); impl_from!( @@ -342,15 +427,36 @@ impl_from!( i16x32_from, test_v512 | u16x32, u8x32, - i8x32 + i8x32, + m1x32, + m8x32 ); impl_from!( u16x32: u16, u16x32_from, test_v512 | i16x32, u8x32, - i8x32 + i8x32, + m1x32, + m8x32 ); -impl_from!(i8x64: i8, i8x64_from, test_v512 | u8x64); -impl_from!(u8x64: u8, u8x64_from, test_v512 | i8x64); +impl_from!(i8x64: i8, i8x64_from, test_v512 | u8x64, m1x64); +impl_from!(u8x64: u8, u8x64_from, test_v512 | i8x64, m1x64); + +impl_from!(m1x32: i16, m1x32_from, test_v512 | m8x32); + +impl_from!( + m1x16: i32, + m1x16_from, + test_v512 | m16x16, + m8x16 +); + +impl_from!( + m1x8: i64, + m1x8_from, + test_v512 | m32x8, + m16x8, + m8x8 +); diff --git a/library/stdarch/coresimd/ppsv/v64.rs b/library/stdarch/coresimd/ppsv/v64.rs index 43f3ca4209f1..cfa56a234a81 100644 --- a/library/stdarch/coresimd/ppsv/v64.rs +++ b/library/stdarch/coresimd/ppsv/v64.rs @@ -1,57 +1,70 @@ //! 64-bit wide portable packed vector types. -use coresimd::simd::{b8x2, b8x4}; simd_i_ty! { - i8x8: 8, i8, b8x8, i8x8_tests, test_v64 | + i8x8: 8, i8, m8x8, i8x8_tests, test_v64 | i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 64-bit vector with 8 `i8` lanes. } simd_u_ty! { - u8x8: 8, u8, b8x8, u8x8_tests, test_v64 | + u8x8: 8, u8, m8x8, u8x8_tests, test_v64 | u8, u8, u8, u8, u8, u8, u8, u8 | x0, x1, x2, x3, x4, x5, x6, x7 | /// A 64-bit vector with 8 `u8` lanes. } -simd_b_ty! { - b8x8: 8, i8, b8x8_tests, test_v64 | +simd_m_ty! { + m8x8: 8, i8, m8x8_tests, test_v64 | i8, i8, i8, i8, i8, i8, i8, i8 | x0, x1, x2, x3, x4, x5, x6, x7 | - /// A 64-bit vector with 8 `bool` lanes. + /// A 64-bit vector mask with 8 lanes. } simd_i_ty! { - i16x4: 4, i16, b8x4, i16x4_tests, test_v64 | + i16x4: 4, i16, m16x4, i16x4_tests, test_v64 | i16, i16, i16, i16 | x0, x1, x2, x3 | /// A 64-bit vector with 4 `i16` lanes. } simd_u_ty! { - u16x4: 4, u16, b8x4, u16x4_tests, test_v64 | + u16x4: 4, u16, m16x4, u16x4_tests, test_v64 | u16, u16, u16, u16 | x0, x1, x2, x3 | /// A 64-bit vector with 4 `u16` lanes. } +simd_m_ty! { + m16x4: 4, i16, m16x4_tests, test_v64 | + i16, i16, i16, i16 | + x0, x1, x2, x3 | + /// A 64-bit vector mask with 4 lanes. +} + simd_i_ty! { - i32x2: 2, i32, b8x2, i32x2_tests, test_v64 | + i32x2: 2, i32, m32x2, i32x2_tests, test_v64 | i32, i32 | x0, x1 | /// A 64-bit vector with 2 `i32` lanes. } simd_u_ty! { - u32x2: 2, u32, b8x2, u32x2_tests, test_v64 | + u32x2: 2, u32, m32x2, u32x2_tests, test_v64 | u32, u32 | x0, x1 | /// A 64-bit vector with 2 `u32` lanes. } +simd_m_ty! { + m32x2: 2, i32, m32x2_tests, test_v64 | + i32, i32 | + x0, x1 | + /// A 64-bit vector mask with 2 lanes. +} + simd_f_ty! { - f32x2: 2, f32, b8x2, f32x2_tests, test_v64 | + f32x2: 2, f32, m32x2, f32x2_tests, test_v64 | f32, f32 | x0, x1 | /// A 64-bit vector with 2 `f32` lanes. @@ -129,11 +142,13 @@ impl_from_bits!( u32x2_from_bits, test_v64 | i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); from_bits_x86!(u32x2, u32, u32x2_from_bits_x86); from_bits_arm!( @@ -148,11 +163,13 @@ impl_from_bits!( i32x2_from_bits, test_v64 | u32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); from_bits_x86!(i32x2, i32, i32x2_from_bits_x86); from_bits_arm!( @@ -167,11 +184,13 @@ impl_from_bits!( f32x2_from_bits, test_v64 | i32x2, u32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); from_bits_x86!(f32x2, f32, f32x2_from_bits_x86); from_bits_arm!( @@ -186,10 +205,12 @@ impl_from_bits!( u16x4_from_bits, test_v64 | u32x2, i32x2, + m32x2, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); from_bits_x86!(u16x4, u16, u16x4_from_bits_x86); from_bits_arm!( @@ -204,10 +225,12 @@ impl_from_bits!( i16x4_from_bits, test_v64 | u32x2, i32x2, + m32x2, u16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); from_bits_x86!(i16x4, i16, i16x4_from_bits_x86); from_bits_arm!( @@ -222,10 +245,12 @@ impl_from_bits!( u8x8_from_bits, test_v64 | u32x2, i32x2, + m32x2, u16x4, i16x4, + m16x4, i8x8, - b8x8 + m8x8 ); from_bits_x86!(u8x8, u8, u8x8_from_bits_x86); from_bits_arm!( @@ -240,10 +265,12 @@ impl_from_bits!( i8x8_from_bits, test_v64 | u32x2, i32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, - b8x8 + m8x8 ); from_bits_x86!(i8x8, i8, i8x8_from_bits_x86); from_bits_arm!( @@ -259,12 +286,16 @@ impl_from!( test_v64 | f64x2, u64x2, i64x2, + m64x2, u32x2, i32x2, + m32x2, u16x2, i16x2, + m16x2, u8x2, - i8x2 + i8x2, + m8x2 ); impl_from!( @@ -273,12 +304,16 @@ impl_from!( test_v64 | f64x2, u64x2, i64x2, + m64x2, f32x2, i32x2, + m32x2, u16x2, i16x2, + m16x2, u8x2, - i8x2 + i8x2, + m8x2 ); impl_from!( @@ -287,12 +322,16 @@ impl_from!( test_v64 | f64x2, u64x2, i64x2, + m64x2, f32x2, u32x2, + m32x2, u16x2, i16x2, + m16x2, u8x2, - i8x2 + i8x2, + m8x2 ); impl_from!( @@ -301,12 +340,16 @@ impl_from!( test_v64 | f64x4, u64x4, i64x4, + m64x4, f32x4, i32x4, u32x4, + m32x4, i16x4, + m16x4, u8x4, - i8x4 + i8x4, + m8x4 ); impl_from!( @@ -315,12 +358,16 @@ impl_from!( test_v64 | f64x4, u64x4, i64x4, + m64x4, f32x4, i32x4, u32x4, + m32x4, u16x4, + m16x4, u8x4, - i8x4 + i8x4, + m8x4 ); impl_from!( i8x8: i8, @@ -328,12 +375,16 @@ impl_from!( test_v64 | f64x8, u64x8, i64x8, + m1x8, f32x8, u32x8, i32x8, + m32x8, i16x8, u16x8, - u8x8 + m16x8, + u8x8, + m8x8 ); impl_from!( u8x8: u8, @@ -341,10 +392,38 @@ impl_from!( test_v64 | f64x8, u64x8, i64x8, + m1x8, f32x8, u32x8, i32x8, + m32x8, i16x8, u16x8, - i8x8 + m16x8, + i8x8, + m8x8 +); + +impl_from!( + m8x8: i8, + m8x8_from, + test_v64 | m1x8, + m32x8, + m16x8 +); + +impl_from!( + m16x4: i16, + m16x4_from, + test_v64 | m64x4, + m32x4, + m8x4 +); + +impl_from!( + m32x2: i32, + m32x2_from, + test_v64 | m64x2, + m16x2, + m8x2 ); diff --git a/library/stdarch/coresimd/simd_llvm.rs b/library/stdarch/coresimd/simd_llvm.rs index 790ec31afb8a..52d8629a31af 100644 --- a/library/stdarch/coresimd/simd_llvm.rs +++ b/library/stdarch/coresimd/simd_llvm.rs @@ -45,4 +45,6 @@ extern "platform-intrinsic" { pub fn simd_reduce_xor(x: T) -> U; pub fn simd_reduce_all(x: T) -> bool; pub fn simd_reduce_any(x: T) -> bool; + + pub fn simd_select(m: M, a: T, b: T) -> T; } diff --git a/library/stdarch/coresimd/x86/mod.rs b/library/stdarch/coresimd/x86/mod.rs index 536253684411..716c935da01c 100644 --- a/library/stdarch/coresimd/x86/mod.rs +++ b/library/stdarch/coresimd/x86/mod.rs @@ -438,98 +438,120 @@ impl m256iExt for __m256i { } } -use coresimd::simd::{b8x16, b8x32, b8x8, f32x2, f32x4, f32x8, f64x2, f64x4, - i16x16, i16x4, i16x8, i32x2, i32x4, i32x8, i64x2, i64x4, - i8x16, i8x32, i8x8, u16x16, u16x4, u16x8, u32x2, u32x4, +use coresimd::simd::{f32x2, f32x4, f32x8, f64x2, f64x4, i16x16, i16x4, i16x8, + i32x2, i32x4, i32x8, i64x2, i64x4, i8x16, i8x32, i8x8, + m16x16, m16x4, m16x8, m32x2, m32x4, m32x8, m64x2, m64x4, + m8x16, m8x32, m8x8, u16x16, u16x4, u16x8, u32x2, u32x4, u32x8, u64x2, u64x4, u8x16, u8x32, u8x8}; impl_from_bits_!( __m64: u32x2, i32x2, f32x2, + m32x2, u16x4, i16x4, + m16x4, u8x8, i8x8, - b8x8 + m8x8 ); + impl_from_bits_!( __m128: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( __m128i: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( __m128d: u64x2, i64x2, f64x2, + m64x2, u32x4, i32x4, f32x4, + m32x4, u16x8, i16x8, + m16x8, u8x16, i8x16, - b8x16 + m8x16 ); impl_from_bits_!( __m256: u64x4, i64x4, f64x4, + m64x4, u32x8, i32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); impl_from_bits_!( __m256i: u64x4, i64x4, f64x4, + m64x4, u32x8, i32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); impl_from_bits_!( __m256d: u64x4, i64x4, f64x4, + m64x4, u32x8, i32x8, f32x8, + m32x8, u16x16, i16x16, + m16x16, u8x32, i8x32, - b8x32 + m8x32 ); mod eflags;