Add AVX 512f gather, scatter and compare intrinsics (#866)
Co-authored-by: bjorn3 <bjorn3@users.noreply.github.com>
This commit is contained in:
parent
a214956fe5
commit
5ff50904d8
8 changed files with 2509 additions and 3 deletions
|
|
@ -198,6 +198,18 @@ simd_ty!(i32x16[i32]:
|
|||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15);
|
||||
|
||||
simd_ty!(u32x16[u32]:
|
||||
u32, u32, u32, u32, u32, u32, u32, u32,
|
||||
u32, u32, u32, u32, u32, u32, u32, u32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15);
|
||||
|
||||
simd_ty!(f32x16[f32]:
|
||||
f32, f32, f32, f32, f32, f32, f32, f32,
|
||||
f32, f32, f32, f32, f32, f32, f32, f32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15);
|
||||
|
||||
simd_ty!(i64x8[i64]:
|
||||
i64, i64, i64, i64, i64, i64, i64, i64
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
|
|
@ -205,3 +217,7 @@ simd_ty!(i64x8[i64]:
|
|||
simd_ty!(u64x8[u64]:
|
||||
u64, u64, u64, u64, u64, u64, u64, u64
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
|
||||
simd_ty!(f64x8[f64]:
|
||||
f64, f64, f64, f64, f64, f64, f64, f64
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -92,6 +92,22 @@ macro_rules! constify_imm2 {
|
|||
};
|
||||
}
|
||||
|
||||
// For gather instructions, the only valid values for scale are 1, 2, 4 and 8.
|
||||
// This macro enforces that.
|
||||
#[allow(unused)]
|
||||
macro_rules! constify_imm8_gather {
|
||||
($imm8:expr, $expand:ident) => {
|
||||
#[allow(overflowing_literals)]
|
||||
match ($imm8) {
|
||||
1 => $expand!(1),
|
||||
2 => $expand!(2),
|
||||
4 => $expand!(4),
|
||||
8 => $expand!(8),
|
||||
_ => panic!("Only 1, 2, 4, and 8 are valid values"),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
macro_rules! assert_approx_eq {
|
||||
($a:expr, $b:expr, $eps:expr) => {{
|
||||
|
|
|
|||
|
|
@ -350,6 +350,10 @@ pub type __mmask16 = u16;
|
|||
#[allow(non_camel_case_types)]
|
||||
pub type __mmask8 = u8;
|
||||
|
||||
/// The `_MM_CMPINT_ENUM` type used to specify comparison operations in AVX-512 intrinsics.
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type _MM_CMPINT_ENUM = i32;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
#[cfg(test)]
|
||||
|
|
@ -504,11 +508,34 @@ impl m256iExt for __m256i {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdimd_internal", issue = "none")]
|
||||
pub(crate) trait m256Ext: Sized {
|
||||
fn as_m256(self) -> __m256;
|
||||
|
||||
#[inline]
|
||||
fn as_f32x8(self) -> crate::core_arch::simd::f32x8 {
|
||||
unsafe { transmute(self.as_m256()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl m256Ext for __m256 {
|
||||
#[inline]
|
||||
fn as_m256(self) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdimd_internal", issue = "none")]
|
||||
pub(crate) trait m512iExt: Sized {
|
||||
fn as_m512i(self) -> __m512i;
|
||||
|
||||
#[inline]
|
||||
fn as_u32x16(self) -> crate::core_arch::simd::u32x16 {
|
||||
unsafe { transmute(self.as_m512i()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
|
||||
unsafe { transmute(self.as_m512i()) }
|
||||
|
|
@ -532,6 +559,42 @@ impl m512iExt for __m512i {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdimd_internal", issue = "none")]
|
||||
pub(crate) trait m512Ext: Sized {
|
||||
fn as_m512(self) -> __m512;
|
||||
|
||||
#[inline]
|
||||
fn as_f32x16(self) -> crate::core_arch::simd::f32x16 {
|
||||
unsafe { transmute(self.as_m512()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl m512Ext for __m512 {
|
||||
#[inline]
|
||||
fn as_m512(self) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdimd_internal", issue = "none")]
|
||||
pub(crate) trait m512dExt: Sized {
|
||||
fn as_m512d(self) -> __m512d;
|
||||
|
||||
#[inline]
|
||||
fn as_f64x8(self) -> crate::core_arch::simd::f64x8 {
|
||||
unsafe { transmute(self.as_m512d()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl m512dExt for __m512d {
|
||||
#[inline]
|
||||
fn as_m512d(self) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
mod eflags;
|
||||
pub use self::eflags::*;
|
||||
|
||||
|
|
|
|||
|
|
@ -143,3 +143,21 @@ pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
|
|||
}
|
||||
assert_eq!(A { a }.b, A { a: b }.b)
|
||||
}
|
||||
|
||||
pub unsafe fn assert_eq_m512(a: __m512, b: __m512) {
|
||||
// TODO: This should use `_mm512_cmpeq_ps_mask`, but that isn't yet implemented.
|
||||
union A {
|
||||
a: __m512,
|
||||
b: [f32; 16],
|
||||
}
|
||||
assert_eq!(A { a }.b, A { a: b }.b)
|
||||
}
|
||||
|
||||
pub unsafe fn assert_eq_m512d(a: __m512d, b: __m512d) {
|
||||
// TODO: This should use `_mm512_cmpeq_pd_mask`, but that isn't yet implemented.
|
||||
union A {
|
||||
a: __m512d,
|
||||
b: [f64; 8],
|
||||
}
|
||||
assert_eq!(A { a }.b, A { a: b }.b)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,44 @@ use crate::{
|
|||
mem::transmute,
|
||||
};
|
||||
|
||||
/// Sets packed 64-bit integers in `dst` with the supplied values.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_set_pd(
|
||||
e0: f64,
|
||||
e1: f64,
|
||||
e2: f64,
|
||||
e3: f64,
|
||||
e4: f64,
|
||||
e5: f64,
|
||||
e6: f64,
|
||||
e7: f64,
|
||||
) -> __m512d {
|
||||
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
|
||||
}
|
||||
|
||||
/// Sets packed 64-bit integers in `dst` with the supplied values in
|
||||
/// reverse order.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_setr_pd(
|
||||
e0: f64,
|
||||
e1: f64,
|
||||
e2: f64,
|
||||
e3: f64,
|
||||
e4: f64,
|
||||
e5: f64,
|
||||
e6: f64,
|
||||
e7: f64,
|
||||
) -> __m512d {
|
||||
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Sets packed 64-bit integers in `dst` with the supplied values.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
|
||||
|
|
@ -49,6 +87,22 @@ mod tests {
|
|||
use crate::core_arch::x86::*;
|
||||
use crate::core_arch::x86_64::*;
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_setzero_pd() {
|
||||
assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_set1_pd() {
|
||||
let expected = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.);
|
||||
assert_eq_m512d(expected, _mm512_set1_pd(2.));
|
||||
}
|
||||
|
||||
unsafe fn test_mm512_set1_epi64() {
|
||||
let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
assert_eq_m512i(r, _mm512_set1_epi64(2));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_cmplt_epu64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
|
||||
|
|
@ -136,6 +190,40 @@ mod tests {
|
|||
assert_eq!(r, 0b01001010);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_cmpneq_epu64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
|
||||
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
|
||||
let m = _mm512_cmpneq_epu64_mask(b, a);
|
||||
assert_eq!(m, !_mm512_cmpeq_epu64_mask(b, a));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_cmpneq_epu64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, -100, 100);
|
||||
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
|
||||
let mask = 0b01111010;
|
||||
let r = _mm512_mask_cmpneq_epu64_mask(mask, b, a);
|
||||
assert_eq!(r, 0b00110010);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_cmp_epu64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
|
||||
let b = _mm512_set1_epi64(-1);
|
||||
let m = _mm512_cmp_epu64_mask(a, b, _MM_CMPINT_LT);
|
||||
assert_eq!(m, 0b11001111);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_cmp_epu64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
|
||||
let b = _mm512_set1_epi64(-1);
|
||||
let mask = 0b01111010;
|
||||
let r = _mm512_mask_cmp_epu64_mask(mask, a, b, _MM_CMPINT_LT);
|
||||
assert_eq!(r, 0b01001010);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_cmplt_epi64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
|
||||
|
|
@ -223,6 +311,18 @@ mod tests {
|
|||
assert_eq!(r, 0b01001010);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_set_pd() {
|
||||
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
|
||||
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_setr_pd() {
|
||||
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
|
||||
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_set_epi64() {
|
||||
let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
|
|
@ -234,4 +334,379 @@ mod tests {
|
|||
let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
|
||||
}
|
||||
|
||||
unsafe fn test_mm512_cmpneq_epi64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
|
||||
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
|
||||
let m = _mm512_cmpneq_epi64_mask(b, a);
|
||||
assert_eq!(m, !_mm512_cmpeq_epi64_mask(b, a));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_cmpneq_epi64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, -100, 100);
|
||||
let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
|
||||
let mask = 0b01111010;
|
||||
let r = _mm512_mask_cmpneq_epi64_mask(mask, b, a);
|
||||
assert_eq!(r, 0b00110010)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_cmp_epi64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
|
||||
let b = _mm512_set1_epi64(-1);
|
||||
let m = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_LT);
|
||||
assert_eq!(m, 0b00000101);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_cmp_epi64_mask() {
|
||||
let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
|
||||
let b = _mm512_set1_epi64(-1);
|
||||
let mask = 0b01100110;
|
||||
let r = _mm512_mask_cmp_epi64_mask(mask, a, b, _MM_CMPINT_LT);
|
||||
assert_eq!(r, 0b00000100);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i32gather_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f64;
|
||||
}
|
||||
// A multiplier of 8 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let r = _mm512_i32gather_pd(index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i32gather_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f64;
|
||||
}
|
||||
let src = _mm512_set1_pd(2.);
|
||||
let mask = 0b10101010;
|
||||
#[rustfmt::skip]
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
// A multiplier of 8 is word-addressing
|
||||
let r = _mm512_mask_i32gather_pd(src, mask, index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64gather_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f64;
|
||||
}
|
||||
// A multiplier of 8 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let r = _mm512_i64gather_pd(index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64gather_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f64;
|
||||
}
|
||||
let src = _mm512_set1_pd(2.);
|
||||
let mask = 0b10101010;
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
// A multiplier of 8 is word-addressing
|
||||
let r = _mm512_mask_i64gather_pd(src, mask, index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64gather_ps() {
|
||||
let mut arr = [0f32; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f32;
|
||||
}
|
||||
// A multiplier of 4 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let r = _mm512_i64gather_ps(index, arr.as_ptr() as *const u8, 4);
|
||||
assert_eq_m256(r, _mm256_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64gather_ps() {
|
||||
let mut arr = [0f32; 128];
|
||||
for i in 0..128 {
|
||||
arr[i] = i as f32;
|
||||
}
|
||||
let src = _mm256_set1_ps(2.);
|
||||
let mask = 0b10101010;
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
// A multiplier of 4 is word-addressing
|
||||
let r = _mm512_mask_i64gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
|
||||
assert_eq_m256(r, _mm256_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i32gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
// A multiplier of 8 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let r = _mm512_i32gather_epi64(index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i32gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let src = _mm512_set1_epi64(2);
|
||||
let mask = 0b10101010;
|
||||
#[rustfmt::skip]
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
// A multiplier of 8 is word-addressing
|
||||
let r = _mm512_mask_i32gather_epi64(src, mask, index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
// A multiplier of 8 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let r = _mm512_i64gather_epi64(index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64gather_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let src = _mm512_set1_epi64(2);
|
||||
let mask = 0b10101010;
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
// A multiplier of 8 is word-addressing
|
||||
let r = _mm512_mask_i64gather_epi64(src, mask, index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64gather_epi32() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
// A multiplier of 8 is word-addressing
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let r = _mm512_i64gather_epi32(index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64gather_epi32() {
|
||||
let mut arr = [0i64; 128];
|
||||
for i in 0..128i64 {
|
||||
arr[i as usize] = i;
|
||||
}
|
||||
let src = _mm256_set1_epi32(2);
|
||||
let mask = 0b10101010;
|
||||
#[rustfmt::skip]
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
// A multiplier of 8 is word-addressing
|
||||
let r = _mm512_mask_i64gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 8);
|
||||
assert_eq_m256i(r, _mm256_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i32scatter_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
// A multiplier of 8 is word-addressing
|
||||
_mm512_i32scatter_pd(arr.as_mut_ptr() as *mut u8, index, src, 8);
|
||||
let mut expected = [0f64; 128];
|
||||
for i in 0..8 {
|
||||
expected[i * 16] = (i + 1) as f64;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i32scatter_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
let mask = 0b10101010;
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
// A multiplier of 8 is word-addressing
|
||||
_mm512_mask_i32scatter_pd(arr.as_mut_ptr() as *mut u8, mask, index, src, 8);
|
||||
let mut expected = [0f64; 128];
|
||||
for i in 0..4 {
|
||||
expected[i * 32 + 16] = 2. * (i + 1) as f64;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64scatter_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
// A multiplier of 8 is word-addressing
|
||||
_mm512_i64scatter_pd(arr.as_mut_ptr() as *mut u8, index, src, 8);
|
||||
let mut expected = [0f64; 128];
|
||||
for i in 0..8 {
|
||||
expected[i * 16] = (i + 1) as f64;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64scatter_pd() {
|
||||
let mut arr = [0f64; 128];
|
||||
let mask = 0b10101010;
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
// A multiplier of 8 is word-addressing
|
||||
_mm512_mask_i64scatter_pd(arr.as_mut_ptr() as *mut u8, mask, index, src, 8);
|
||||
let mut expected = [0f64; 128];
|
||||
for i in 0..4 {
|
||||
expected[i * 32 + 16] = 2. * (i + 1) as f64;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64scatter_ps() {
|
||||
let mut arr = [0f32; 128];
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
// A multiplier of 4 is word-addressing
|
||||
_mm512_i64scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
|
||||
let mut expected = [0f32; 128];
|
||||
for i in 0..8 {
|
||||
expected[i * 16] = (i + 1) as f32;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64scatter_ps() {
|
||||
let mut arr = [0f32; 128];
|
||||
let mask = 0b10101010;
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
|
||||
// A multiplier of 4 is word-addressing
|
||||
_mm512_mask_i64scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
|
||||
let mut expected = [0f32; 128];
|
||||
for i in 0..4 {
|
||||
expected[i * 32 + 16] = 2. * (i + 1) as f32;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i32scatter_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
// A multiplier of 8 is word-addressing
|
||||
_mm512_i32scatter_epi64(arr.as_mut_ptr() as *mut u8, index, src, 8);
|
||||
let mut expected = [0i64; 128];
|
||||
for i in 0..8 {
|
||||
expected[i * 16] = (i + 1) as i64;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i32scatter_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
let mask = 0b10101010;
|
||||
let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
// A multiplier of 8 is word-addressing
|
||||
_mm512_mask_i32scatter_epi64(arr.as_mut_ptr() as *mut u8, mask, index, src, 8);
|
||||
let mut expected = [0i64; 128];
|
||||
for i in 0..4 {
|
||||
expected[i * 32 + 16] = 2 * (i + 1) as i64;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64scatter_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
// A multiplier of 8 is word-addressing
|
||||
_mm512_i64scatter_epi64(arr.as_mut_ptr() as *mut u8, index, src, 8);
|
||||
let mut expected = [0i64; 128];
|
||||
for i in 0..8 {
|
||||
expected[i * 16] = (i + 1) as i64;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64scatter_epi64() {
|
||||
let mut arr = [0i64; 128];
|
||||
let mask = 0b10101010;
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
// A multiplier of 8 is word-addressing
|
||||
_mm512_mask_i64scatter_epi64(arr.as_mut_ptr() as *mut u8, mask, index, src, 8);
|
||||
let mut expected = [0i64; 128];
|
||||
for i in 0..4 {
|
||||
expected[i * 32 + 16] = 2 * (i + 1) as i64;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_i64scatter_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
// A multiplier of 4 is word-addressing
|
||||
_mm512_i64scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
|
||||
let mut expected = [0i32; 128];
|
||||
for i in 0..8 {
|
||||
expected[i * 16] = (i + 1) as i32;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_i64scatter_epi32() {
|
||||
let mut arr = [0i32; 128];
|
||||
let mask = 0b10101010;
|
||||
let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
|
||||
let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
// A multiplier of 4 is word-addressing
|
||||
_mm512_mask_i64scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
|
||||
let mut expected = [0i32; 128];
|
||||
for i in 0..4 {
|
||||
expected[i * 32 + 16] = 2 * (i + 1) as i32;
|
||||
}
|
||||
assert_eq!(&arr[..], &expected[..],);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -147,6 +147,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
|
|||
"__m512i" => quote! { &M512I },
|
||||
"__mmask8" => quote! { &MMASK8 },
|
||||
"__mmask16" => quote! { &MMASK16 },
|
||||
"_MM_CMPINT_ENUM" => quote! { &MM_CMPINT_ENUM },
|
||||
"__m64" => quote! { &M64 },
|
||||
"bool" => quote! { &BOOL },
|
||||
"f32" => quote! { &F32 },
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ static M512I: Type = Type::M512I;
|
|||
static M512D: Type = Type::M512D;
|
||||
static MMASK8: Type = Type::MMASK8;
|
||||
static MMASK16: Type = Type::MMASK16;
|
||||
static MM_CMPINT_ENUM: Type = Type::MM_CMPINT_ENUM;
|
||||
|
||||
static TUPLE: Type = Type::Tuple;
|
||||
static CPUID: Type = Type::CpuidResult;
|
||||
|
|
@ -79,6 +80,7 @@ enum Type {
|
|||
M512I,
|
||||
MMASK8,
|
||||
MMASK16,
|
||||
MM_CMPINT_ENUM,
|
||||
Tuple,
|
||||
CpuidResult,
|
||||
Never,
|
||||
|
|
@ -218,9 +220,6 @@ fn verify_all_signatures() {
|
|||
"_mm256_undefined_si256",
|
||||
"_bextr2_u32",
|
||||
"_mm_tzcnt_32",
|
||||
"_mm512_setzero_si512",
|
||||
"_mm512_setr_epi32",
|
||||
"_mm512_set1_epi64",
|
||||
"_m_paddb",
|
||||
"_m_paddw",
|
||||
"_m_paddd",
|
||||
|
|
@ -460,6 +459,10 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
|
|||
// The XML file names IFMA as "avx512ifma52", while Rust calls
|
||||
// it "avx512ifma".
|
||||
"avx512ifma52" => String::from("avx512ifma"),
|
||||
// Some AVX512f intrinsics are also supported by Knight's Corner.
|
||||
// The XML lists them as avx512f/kncni, but we are solely gating
|
||||
// them behind avx512f since we don't have a KNC feature yet.
|
||||
"avx512f/kncni" => String::from("avx512f"),
|
||||
// See: https://github.com/rust-lang/stdarch/issues/738
|
||||
// The intrinsics guide calls `f16c` `fp16c` in disagreement with
|
||||
// Intel's architecture manuals.
|
||||
|
|
@ -664,6 +667,7 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
|
|||
|
||||
(&Type::MMASK8, "__mmask8") => {}
|
||||
(&Type::MMASK16, "__mmask16") => {}
|
||||
(&Type::MM_CMPINT_ENUM, "_MM_CMPINT_ENUM") => {}
|
||||
|
||||
// This is a macro (?) in C which seems to mutate its arguments, but
|
||||
// that means that we're taking pointers to arguments in rust
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue