Implemented missing gather-scatters

This commit is contained in:
sayantn 2024-06-30 12:34:35 +05:30 committed by Amanieu d'Antras
parent 1c3b3b80c0
commit 775dcaabde
3 changed files with 1981 additions and 109 deletions

View file

@ -147,78 +147,6 @@
</p></details>
<details><summary>["AVX512F"]</summary><p>
* [ ] [`_mm512_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
* [ ] [`_mm512_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
* [ ] [`_mm512_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
* [ ] [`_mm512_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
* [ ] [`_mm512_mask_i32logather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
* [ ] [`_mm512_mask_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
* [ ] [`_mm512_mask_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
* [ ] [`_mm512_mask_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
* [ ] [`_mm512_stream_load_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_load_si512)
* [ ] [`_mm_mask_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
* [ ] [`_mm_mask_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
* [ ] [`_mm_mask_store_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
* [ ] [`_mm_mask_store_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
* [ ] [`_mm_maskz_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
* [ ] [`_mm_maskz_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
</p></details>
<details><summary>["AVX512F", "AVX512VL"]</summary><p>
* [ ] [`_mm256_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
* [ ] [`_mm256_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
* [ ] [`_mm256_i32scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
* [ ] [`_mm256_i64scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
* [ ] [`_mm256_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
* [ ] [`_mm256_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
* [ ] [`_mm256_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
* [ ] [`_mm256_mask_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
* [ ] [`_mm256_mask_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
* [ ] [`_mm256_mask_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
* [ ] [`_mm256_mask_i32scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
* [ ] [`_mm256_mask_i64scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
* [ ] [`_mm256_mask_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
* [ ] [`_mm256_mask_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
* [ ] [`_mm256_mask_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
* [ ] [`_mm256_mmask_i32gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
* [ ] [`_mm256_mmask_i32gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
* [ ] [`_mm256_mmask_i32gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
* [ ] [`_mm256_mmask_i32gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
* [ ] [`_mm256_mmask_i64gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
* [ ] [`_mm256_mmask_i64gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
* [ ] [`_mm256_mmask_i64gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
* [ ] [`_mm256_mmask_i64gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
* [ ] [`_mm_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
* [ ] [`_mm_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
* [ ] [`_mm_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
* [ ] [`_mm_i32scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
* [ ] [`_mm_i64scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
* [ ] [`_mm_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
* [ ] [`_mm_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
* [ ] [`_mm_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
* [ ] [`_mm_mask_i32scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
* [ ] [`_mm_mask_i32scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
* [ ] [`_mm_mask_i32scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
* [ ] [`_mm_mask_i32scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
* [ ] [`_mm_mask_i64scatter_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
* [ ] [`_mm_mask_i64scatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
* [ ] [`_mm_mask_i64scatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
* [ ] [`_mm_mask_i64scatter_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
* [ ] [`_mm_mmask_i32gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
* [ ] [`_mm_mmask_i32gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
* [ ] [`_mm_mmask_i32gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
* [ ] [`_mm_mmask_i32gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
* [ ] [`_mm_mmask_i64gather_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
* [ ] [`_mm_mmask_i64gather_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
* [ ] [`_mm_mmask_i64gather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
* [ ] [`_mm_mmask_i64gather_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
</p></details>
<details><summary>["AVX512_BF16", "AVX512F"]</summary><p>
* [ ] [`_mm512_cvtpbh_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_cvtpbh_ps)

File diff suppressed because it is too large Load diff

View file

@ -7649,20 +7649,6 @@ mod tests {
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i32scatter_epi64() {
let mut arr = [0i64; 64];
let index = _mm_setr_epi32(0, 16, 32, 48);
let src = _mm256_setr_epi64x(1, 2, 3, 4);
// A multiplier of 8 is word-addressing
_mm256_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src);
let mut expected = [0i64; 64];
for i in 0..4 {
expected[i * 16] = (i + 1) as i64;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i64scatter_epi64() {
let mut arr = [0i64; 128];
@ -7721,6 +7707,566 @@ mod tests {
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32logather_epi64() {
let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
let r = _mm512_i32logather_epi64::<8>(vindex, base_addr.as_ptr().cast());
let expected = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
assert_eq_m512i(expected, r);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32logather_epi64() {
let base_addr: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let src = _mm512_setr_epi64(9, 10, 11, 12, 13, 14, 15, 16);
let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
let r =
_mm512_mask_i32logather_epi64::<8>(src, 0b01010101, vindex, base_addr.as_ptr().cast());
let expected = _mm512_setr_epi64(2, 10, 4, 12, 6, 14, 8, 16);
assert_eq_m512i(expected, r);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32logather_pd() {
let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
let r = _mm512_i32logather_pd::<8>(vindex, base_addr.as_ptr().cast());
let expected = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
assert_eq_m512d(expected, r);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32logather_pd() {
let base_addr: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
let src = _mm512_setr_pd(9., 10., 11., 12., 13., 14., 15., 16.);
let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
let r = _mm512_mask_i32logather_pd::<8>(src, 0b01010101, vindex, base_addr.as_ptr().cast());
let expected = _mm512_setr_pd(2., 10., 4., 12., 6., 14., 8., 16.);
assert_eq_m512d(expected, r);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32loscatter_epi64() {
let mut base_addr: [i64; 8] = [0; 8];
let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
_mm512_i32loscatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2, 3, 4, 5, 6, 7, 8];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32loscatter_epi64() {
let mut base_addr: [i64; 8] = [0; 8];
let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
let src = _mm512_setr_epi64(2, 3, 4, 5, 6, 7, 8, 1);
_mm512_mask_i32loscatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src);
let expected = [0, 2, 0, 4, 0, 6, 0, 8];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32loscatter_pd() {
let mut base_addr: [f64; 8] = [0.; 8];
let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
_mm512_i32loscatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2., 3., 4., 5., 6., 7., 8.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32loscatter_pd() {
let mut base_addr: [f64; 8] = [0.; 8];
let vindex = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0, -1, -1, -1, -1, -1, -1, -1, -1);
let src = _mm512_setr_pd(2., 3., 4., 5., 6., 7., 8., 1.);
_mm512_mask_i32loscatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src);
let expected = [0., 2., 0., 4., 0., 6., 0., 8.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mmask_i32gather_epi32() {
let base_addr: [i32; 4] = [1, 2, 3, 4];
let src = _mm_setr_epi32(5, 6, 7, 8);
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let r = _mm_mmask_i32gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_epi32(2, 6, 4, 8);
assert_eq_m128i(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mmask_i32gather_epi64() {
let base_addr: [i64; 2] = [1, 2];
let src = _mm_setr_epi64x(5, 6);
let vindex = _mm_setr_epi32(1, 0, -1, -1);
let r = _mm_mmask_i32gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_epi64x(2, 6);
assert_eq_m128i(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mmask_i32gather_pd() {
let base_addr: [f64; 2] = [1., 2.];
let src = _mm_setr_pd(5., 6.);
let vindex = _mm_setr_epi32(1, 0, -1, -1);
let r = _mm_mmask_i32gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_pd(2., 6.);
assert_eq_m128d(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mmask_i32gather_ps() {
let base_addr: [f32; 4] = [1., 2., 3., 4.];
let src = _mm_setr_ps(5., 6., 7., 8.);
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let r = _mm_mmask_i32gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_ps(2., 6., 4., 8.);
assert_eq_m128(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mmask_i64gather_epi32() {
let base_addr: [i32; 2] = [1, 2];
let src = _mm_setr_epi32(5, 6, 7, 8);
let vindex = _mm_setr_epi64x(1, 0);
let r = _mm_mmask_i64gather_epi32::<4>(src, 0b01, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_epi32(2, 6, 0, 0);
assert_eq_m128i(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mmask_i64gather_epi64() {
let base_addr: [i64; 2] = [1, 2];
let src = _mm_setr_epi64x(5, 6);
let vindex = _mm_setr_epi64x(1, 0);
let r = _mm_mmask_i64gather_epi64::<8>(src, 0b01, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_epi64x(2, 6);
assert_eq_m128i(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mmask_i64gather_pd() {
let base_addr: [f64; 2] = [1., 2.];
let src = _mm_setr_pd(5., 6.);
let vindex = _mm_setr_epi64x(1, 0);
let r = _mm_mmask_i64gather_pd::<8>(src, 0b01, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_pd(2., 6.);
assert_eq_m128d(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mmask_i64gather_ps() {
let base_addr: [f32; 2] = [1., 2.];
let src = _mm_setr_ps(5., 6., 7., 8.);
let vindex = _mm_setr_epi64x(1, 0);
let r = _mm_mmask_i64gather_ps::<4>(src, 0b01, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_ps(2., 6., 0., 0.);
assert_eq_m128(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mmask_i32gather_epi32() {
let base_addr: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8];
let src = _mm256_setr_epi32(9, 10, 11, 12, 13, 14, 15, 16);
let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
let r =
_mm256_mmask_i32gather_epi32::<4>(src, 0b01010101, vindex, base_addr.as_ptr().cast());
let expected = _mm256_setr_epi32(2, 10, 4, 12, 6, 14, 8, 16);
assert_eq_m256i(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mmask_i32gather_epi64() {
let base_addr: [i64; 4] = [1, 2, 3, 4];
let src = _mm256_setr_epi64x(9, 10, 11, 12);
let vindex = _mm_setr_epi32(1, 2, 3, 4);
let r = _mm256_mmask_i32gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast());
let expected = _mm256_setr_epi64x(2, 10, 4, 12);
assert_eq_m256i(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mmask_i32gather_pd() {
let base_addr: [f64; 4] = [1., 2., 3., 4.];
let src = _mm256_setr_pd(9., 10., 11., 12.);
let vindex = _mm_setr_epi32(1, 2, 3, 4);
let r = _mm256_mmask_i32gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast());
let expected = _mm256_setr_pd(2., 10., 4., 12.);
assert_eq_m256d(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mmask_i32gather_ps() {
let base_addr: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.];
let src = _mm256_setr_ps(9., 10., 11., 12., 13., 14., 15., 16.);
let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
let r = _mm256_mmask_i32gather_ps::<4>(src, 0b01010101, vindex, base_addr.as_ptr().cast());
let expected = _mm256_setr_ps(2., 10., 4., 12., 6., 14., 8., 16.);
assert_eq_m256(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mmask_i64gather_epi32() {
let base_addr: [i32; 4] = [1, 2, 3, 4];
let src = _mm_setr_epi32(9, 10, 11, 12);
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let r = _mm256_mmask_i64gather_epi32::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_epi32(2, 10, 4, 12);
assert_eq_m128i(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mmask_i64gather_epi64() {
let base_addr: [i64; 4] = [1, 2, 3, 4];
let src = _mm256_setr_epi64x(9, 10, 11, 12);
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let r = _mm256_mmask_i64gather_epi64::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast());
let expected = _mm256_setr_epi64x(2, 10, 4, 12);
assert_eq_m256i(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mmask_i64gather_pd() {
let base_addr: [f64; 4] = [1., 2., 3., 4.];
let src = _mm256_setr_pd(9., 10., 11., 12.);
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let r = _mm256_mmask_i64gather_pd::<8>(src, 0b0101, vindex, base_addr.as_ptr().cast());
let expected = _mm256_setr_pd(2., 10., 4., 12.);
assert_eq_m256d(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mmask_i64gather_ps() {
let base_addr: [f32; 4] = [1., 2., 3., 4.];
let src = _mm_setr_ps(9., 10., 11., 12.);
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let r = _mm256_mmask_i64gather_ps::<4>(src, 0b0101, vindex, base_addr.as_ptr().cast());
let expected = _mm_setr_ps(2., 10., 4., 12.);
assert_eq_m128(expected, r);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_i32scatter_epi32() {
let mut base_addr: [i32; 4] = [0; 4];
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let src = _mm_setr_epi32(2, 3, 4, 1);
_mm_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2, 3, 4];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_i32scatter_epi32() {
let mut base_addr: [i32; 4] = [0; 4];
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let src = _mm_setr_epi32(2, 3, 4, 1);
_mm_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
let expected = [0, 2, 0, 4];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_i32scatter_epi64() {
let mut base_addr: [i64; 2] = [0; 2];
let vindex = _mm_setr_epi32(1, 0, -1, -1);
let src = _mm_setr_epi64x(2, 1);
_mm_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_i32scatter_epi64() {
let mut base_addr: [i64; 2] = [0; 2];
let vindex = _mm_setr_epi32(1, 0, -1, -1);
let src = _mm_setr_epi64x(2, 1);
_mm_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
let expected = [0, 2];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_i32scatter_pd() {
let mut base_addr: [f64; 2] = [0.; 2];
let vindex = _mm_setr_epi32(1, 0, -1, -1);
let src = _mm_setr_pd(2., 1.);
_mm_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_i32scatter_pd() {
let mut base_addr: [f64; 2] = [0.; 2];
let vindex = _mm_setr_epi32(1, 0, -1, -1);
let src = _mm_setr_pd(2., 1.);
_mm_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
let expected = [0., 2.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_i32scatter_ps() {
let mut base_addr: [f32; 4] = [0.; 4];
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let src = _mm_setr_ps(2., 3., 4., 1.);
_mm_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2., 3., 4.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_i32scatter_ps() {
let mut base_addr: [f32; 4] = [0.; 4];
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let src = _mm_setr_ps(2., 3., 4., 1.);
_mm_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
let expected = [0., 2., 0., 4.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_i64scatter_epi32() {
let mut base_addr: [i32; 2] = [0; 2];
let vindex = _mm_setr_epi64x(1, 0);
let src = _mm_setr_epi32(2, 1, -1, -1);
_mm_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_i64scatter_epi32() {
let mut base_addr: [i32; 2] = [0; 2];
let vindex = _mm_setr_epi64x(1, 0);
let src = _mm_setr_epi32(2, 1, -1, -1);
_mm_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
let expected = [0, 2];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_i64scatter_epi64() {
let mut base_addr: [i64; 2] = [0; 2];
let vindex = _mm_setr_epi64x(1, 0);
let src = _mm_setr_epi64x(2, 1);
_mm_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_i64scatter_epi64() {
let mut base_addr: [i64; 2] = [0; 2];
let vindex = _mm_setr_epi64x(1, 0);
let src = _mm_setr_epi64x(2, 1);
_mm_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
let expected = [0, 2];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_i64scatter_pd() {
let mut base_addr: [f64; 2] = [0.; 2];
let vindex = _mm_setr_epi64x(1, 0);
let src = _mm_setr_pd(2., 1.);
_mm_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_i64scatter_pd() {
let mut base_addr: [f64; 2] = [0.; 2];
let vindex = _mm_setr_epi64x(1, 0);
let src = _mm_setr_pd(2., 1.);
_mm_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
let expected = [0., 2.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_i64scatter_ps() {
let mut base_addr: [f32; 2] = [0.; 2];
let vindex = _mm_setr_epi64x(1, 0);
let src = _mm_setr_ps(2., 1., -1., -1.);
_mm_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_i64scatter_ps() {
let mut base_addr: [f32; 2] = [0.; 2];
let vindex = _mm_setr_epi64x(1, 0);
let src = _mm_setr_ps(2., 1., -1., -1.);
_mm_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b01, vindex, src);
let expected = [0., 2.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i32scatter_epi32() {
let mut base_addr: [i32; 8] = [0; 8];
let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1);
_mm256_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2, 3, 4, 5, 6, 7, 8];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_i32scatter_epi32() {
let mut base_addr: [i32; 8] = [0; 8];
let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
let src = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 8, 1);
_mm256_mask_i32scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src);
let expected = [0, 2, 0, 4, 0, 6, 0, 8];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i32scatter_epi64() {
let mut base_addr: [i64; 4] = [0; 4];
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let src = _mm256_setr_epi64x(2, 3, 4, 1);
_mm256_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2, 3, 4];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_i32scatter_epi64() {
let mut base_addr: [i64; 4] = [0; 4];
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let src = _mm256_setr_epi64x(2, 3, 4, 1);
_mm256_mask_i32scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
let expected = [0, 2, 0, 4];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i32scatter_pd() {
let mut base_addr: [f64; 4] = [0.; 4];
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let src = _mm256_setr_pd(2., 3., 4., 1.);
_mm256_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2., 3., 4.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_i32scatter_pd() {
let mut base_addr: [f64; 4] = [0.; 4];
let vindex = _mm_setr_epi32(1, 2, 3, 0);
let src = _mm256_setr_pd(2., 3., 4., 1.);
_mm256_mask_i32scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
let expected = [0., 2., 0., 4.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i32scatter_ps() {
let mut base_addr: [f32; 8] = [0.; 8];
let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.);
_mm256_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2., 3., 4., 5., 6., 7., 8.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_i32scatter_ps() {
let mut base_addr: [f32; 8] = [0.; 8];
let vindex = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0);
let src = _mm256_setr_ps(2., 3., 4., 5., 6., 7., 8., 1.);
_mm256_mask_i32scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b01010101, vindex, src);
let expected = [0., 2., 0., 4., 0., 6., 0., 8.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i64scatter_epi32() {
let mut base_addr: [i32; 4] = [0; 4];
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let src = _mm_setr_epi32(2, 3, 4, 1);
_mm256_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2, 3, 4];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_i64scatter_epi32() {
let mut base_addr: [i32; 4] = [0; 4];
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let src = _mm_setr_epi32(2, 3, 4, 1);
_mm256_mask_i64scatter_epi32::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
let expected = [0, 2, 0, 4];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i64scatter_epi64() {
let mut base_addr: [i64; 4] = [0; 4];
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let src = _mm256_setr_epi64x(2, 3, 4, 1);
_mm256_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1, 2, 3, 4];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_i64scatter_epi64() {
let mut base_addr: [i64; 4] = [0; 4];
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let src = _mm256_setr_epi64x(2, 3, 4, 1);
_mm256_mask_i64scatter_epi64::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
let expected = [0, 2, 0, 4];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i64scatter_pd() {
let mut base_addr: [f64; 4] = [0.; 4];
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let src = _mm256_setr_pd(2., 3., 4., 1.);
_mm256_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2., 3., 4.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_i64scatter_pd() {
let mut base_addr: [f64; 4] = [0.; 4];
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let src = _mm256_setr_pd(2., 3., 4., 1.);
_mm256_mask_i64scatter_pd::<8>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
let expected = [0., 2., 0., 4.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_i64scatter_ps() {
let mut base_addr: [f32; 4] = [0.; 4];
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let src = _mm_setr_ps(2., 3., 4., 1.);
_mm256_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), vindex, src);
let expected = [1., 2., 3., 4.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_i64scatter_ps() {
let mut base_addr: [f32; 4] = [0.; 4];
let vindex = _mm256_setr_epi64x(1, 2, 3, 0);
let src = _mm_setr_ps(2., 3., 4., 1.);
_mm256_mask_i64scatter_ps::<4>(base_addr.as_mut_ptr().cast(), 0b0101, vindex, src);
let expected = [0., 2., 0., 4.];
assert_eq!(expected, base_addr);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rol_epi64() {
#[rustfmt::skip]