Add AVX512F expandloadu (#1280)
This commit is contained in:
parent
6679b4a6d4
commit
3f075854cc
4 changed files with 746 additions and 195 deletions
|
|
@ -1,34 +1,34 @@
|
|||
<summary>["AVX512BW"]</summary><p>
|
||||
|
||||
* [x] [`_mm512_loadu_epi16`]
|
||||
* [_] [`_mm512_mask_loadu_epi16`] //need i1
|
||||
* [_] [`_mm512_maskz_loadu_epi16`] //need i1
|
||||
* [x] [`_mm512_mask_loadu_epi16`] //need i1
|
||||
* [x] [`_mm512_maskz_loadu_epi16`] //need i1
|
||||
* [x] [`_mm_loadu_epi16`]
|
||||
* [_] [`_mm_mask_loadu_epi16`] //need i1
|
||||
* [_] [`_mm_maskz_loadu_epi16`] //need i1
|
||||
* [x] [`_mm_mask_loadu_epi16`] //need i1
|
||||
* [x] [`_mm_maskz_loadu_epi16`] //need i1
|
||||
* [x] [`_mm256_loadu_epi16`]
|
||||
* [_] [`_mm256_mask_loadu_epi16`] //need i1
|
||||
* [_] [`_mm256_maskz_loadu_epi16`] //need i1
|
||||
* [x] [`_mm256_mask_loadu_epi16`] //need i1
|
||||
* [x] [`_mm256_maskz_loadu_epi16`] //need i1
|
||||
* [x] [`_mm512_loadu_epi8`]
|
||||
* [_] [`_mm512_mask_loadu_epi8`] //need i1
|
||||
* [_] [`_mm512_maskz_loadu_epi8`] //need i1
|
||||
* [x] [`_mm512_mask_loadu_epi8`] //need i1
|
||||
* [x] [`_mm512_maskz_loadu_epi8`] //need i1
|
||||
* [x] [`_mm_loadu_epi8`]
|
||||
* [_] [`_mm_mask_loadu_epi8`] //need i1
|
||||
* [_] [`_mm_maskz_loadu_epi8`] //need i1
|
||||
* [x] [`_mm_mask_loadu_epi8`] //need i1
|
||||
* [x] [`_mm_maskz_loadu_epi8`] //need i1
|
||||
* [x] [`_mm256_loadu_epi8`]
|
||||
* [_] [`_mm256_mask_loadu_epi8`] //need i1
|
||||
* [_] [`_mm256_maskz_loadu_epi8`] //need i1
|
||||
* [_] [`_mm512_mask_storeu_epi16`]
|
||||
* [x] [`_mm256_mask_loadu_epi8`] //need i1
|
||||
* [x] [`_mm256_maskz_loadu_epi8`] //need i1
|
||||
* [x] [`_mm512_mask_storeu_epi16`]
|
||||
* [x] [`_mm512_storeu_epi16`]
|
||||
* [_] [`_mm_mask_storeu_epi16`] //need i1
|
||||
* [x] [`_mm_mask_storeu_epi16`] //need i1
|
||||
* [x] [`_mm_storeu_epi16`]
|
||||
* [_] [`_mm256_mask_storeu_epi16`] //need i1
|
||||
* [x] [`_mm256_mask_storeu_epi16`] //need i1
|
||||
* [x] [`_mm256_storeu_epi16`]
|
||||
* [_] [`_mm512_mask_storeu_epi8`] //need i1
|
||||
* [x] [`_mm512_mask_storeu_epi8`] //need i1
|
||||
* [x] [`_mm512_storeu_epi8`]
|
||||
* [_] [`_mm_mask_storeu_epi8`] //need i1
|
||||
* [x] [`_mm_mask_storeu_epi8`] //need i1
|
||||
* [x] [`_mm_storeu_epi8`]
|
||||
* [_] [`_mm256_mask_storeu_epi8`] //need i1
|
||||
* [x] [`_mm256_mask_storeu_epi8`] //need i1
|
||||
* [x] [`_mm256_storeu_epi8`]
|
||||
* [x] [`_mm512_abs_epi16`]
|
||||
* [x] [`_mm512_mask_abs_epi16`]
|
||||
|
|
|
|||
|
|
@ -1665,30 +1665,30 @@
|
|||
* [x] [`_mm_maskz_expand_pd`]
|
||||
* [x] [`_mm256_mask_expand_pd`]
|
||||
* [x] [`_mm256_maskz_expand_pd`]
|
||||
* [ ] [`_mm512_mask_expandloadu_epi32`] //need i1
|
||||
* [ ] [`_mm512_maskz_expandloadu_epi32`] //need i1
|
||||
* [_] [`_mm_mask_expandloadu_epi32`] //need i1
|
||||
* [_] [`_mm_maskz_expandloadu_epi32`] //need i1
|
||||
* [_] [`_mm256_mask_expandloadu_epi32`] //need i1
|
||||
* [_] [`_mm256_maskz_expandloadu_epi32`] //need i1
|
||||
* [ ] [`_mm512_mask_expandloadu_epi64`] //need i1
|
||||
* [ ] [`_mm512_maskz_expandloadu_epi64`] //need i1
|
||||
* [_] [`_mm_mask_expandloadu_epi64`] //need i1
|
||||
* [_] [`_mm_maskz_expandloadu_epi64`] //need i1
|
||||
* [_] [`_mm256_mask_expandloadu_epi64`] //need i1
|
||||
* [_] [`_mm256_maskz_expandloadu_epi64`] //need i1
|
||||
* [ ] [`_mm512_mask_expandloadu_ps`] //need i1
|
||||
* [ ] [`_mm512_maskz_expandloadu_ps`] //need i1
|
||||
* [_] [`_mm_mask_expandloadu_ps`] //need i1
|
||||
* [_] [`_mm_maskz_expandloadu_ps`] //need i1
|
||||
* [_] [`_mm256_mask_expandloadu_ps`] //need i1
|
||||
* [_] [`_mm256_maskz_expandloadu_ps`] //need i1
|
||||
* [ ] [`_mm512_mask_expandloadu_pd`] //need i1
|
||||
* [ ] [`_mm512_maskz_expandloadu_pd`] //need i1
|
||||
* [_] [`_mm_mask_expandloadu_pd`] //need i1
|
||||
* [_] [`_mm_maskz_expandloadu_pd`] //need i1
|
||||
* [_] [`_mm256_mask_expandloadu_pd`] //need i1
|
||||
* [_] [`_mm256_maskz_expandloadu_pd`] //need i1
|
||||
* [x] [`_mm512_mask_expandloadu_epi32`] //need i1
|
||||
* [x] [`_mm512_maskz_expandloadu_epi32`] //need i1
|
||||
* [x] [`_mm_mask_expandloadu_epi32`] //need i1
|
||||
* [x] [`_mm_maskz_expandloadu_epi32`] //need i1
|
||||
* [x] [`_mm256_mask_expandloadu_epi32`] //need i1
|
||||
* [x] [`_mm256_maskz_expandloadu_epi32`] //need i1
|
||||
* [x] [`_mm512_mask_expandloadu_epi64`] //need i1
|
||||
* [x] [`_mm512_maskz_expandloadu_epi64`] //need i1
|
||||
* [x] [`_mm_mask_expandloadu_epi64`] //need i1
|
||||
* [x] [`_mm_maskz_expandloadu_epi64`] //need i1
|
||||
* [x] [`_mm256_mask_expandloadu_epi64`] //need i1
|
||||
* [x] [`_mm256_maskz_expandloadu_epi64`] //need i1
|
||||
* [x] [`_mm512_mask_expandloadu_ps`] //need i1
|
||||
* [x] [`_mm512_maskz_expandloadu_ps`] //need i1
|
||||
* [x] [`_mm_mask_expandloadu_ps`] //need i1
|
||||
* [x] [`_mm_maskz_expandloadu_ps`] //need i1
|
||||
* [x] [`_mm256_mask_expandloadu_ps`] //need i1
|
||||
* [x] [`_mm256_maskz_expandloadu_ps`] //need i1
|
||||
* [x] [`_mm512_mask_expandloadu_pd`] //need i1
|
||||
* [x] [`_mm512_maskz_expandloadu_pd`] //need i1
|
||||
* [x] [`_mm_mask_expandloadu_pd`] //need i1
|
||||
* [x] [`_mm_maskz_expandloadu_pd`] //need i1
|
||||
* [x] [`_mm256_mask_expandloadu_pd`] //need i1
|
||||
* [x] [`_mm256_maskz_expandloadu_pd`] //need i1
|
||||
* [x] [`_mm512_zextpd128_pd512`]
|
||||
* [x] [`_mm512_zextpd256_pd512`]
|
||||
* [x] [`_mm512_zextps128_ps512`]
|
||||
|
|
|
|||
|
|
@ -1,153 +0,0 @@
|
|||
<summary>["AVX512_VBMI2"]</summary><p>
|
||||
|
||||
* [x] [`_mm_mask_compress_epi16`]
|
||||
* [x] [`_mm_maskz_compress_epi16`]
|
||||
* [x] [`_mm256_mask_compress_epi16`]
|
||||
* [x] [`_mm256_maskz_compress_epi16`]
|
||||
* [x] [`_mm512_mask_compress_epi16`]
|
||||
* [x] [`_mm512_maskz_compress_epi16`]
|
||||
* [x] [`_mm_mask_compress_epi8`]
|
||||
* [x] [`_mm_maskz_compress_epi8`]
|
||||
* [x] [`_mm256_mask_compress_epi8`]
|
||||
* [x] [`_mm256_maskz_compress_epi8`]
|
||||
* [x] [`_mm512_mask_compress_epi8`]
|
||||
* [x] [`_mm512_maskz_compress_epi8`]
|
||||
* [x] [`_mm_mask_compressstoreu_epi16`] //need i1
|
||||
* [x] [`_mm256_mask_compressstoreu_epi16`] //need i1
|
||||
* [x] [`_mm512_mask_compressstoreu_epi16`] //need i1
|
||||
* [x] [`_mm_mask_compressstoreu_epi8`] //need i1
|
||||
* [x] [`_mm256_mask_compressstoreu_epi8`] //need i1
|
||||
* [x] [`_mm512_mask_compressstoreu_epi8`] //need i1
|
||||
* [x] [`_mm_mask_expand_epi16`]
|
||||
* [x] [`_mm_maskz_expand_epi16`]
|
||||
* [x] [`_mm256_mask_expand_epi16`]
|
||||
* [x] [`_mm256_maskz_expand_epi16`]
|
||||
* [x] [`_mm512_mask_expand_epi16`]
|
||||
* [x] [`_mm512_maskz_expand_epi16`]
|
||||
* [x] [`_mm_mask_expand_epi8`]
|
||||
* [x] [`_mm_maskz_expand_epi8`]
|
||||
* [x] [`_mm256_mask_expand_epi8`]
|
||||
* [x] [`_mm256_maskz_expand_epi8`]
|
||||
* [x] [`_mm512_mask_expand_epi8`]
|
||||
* [x] [`_mm512_maskz_expand_epi8`]
|
||||
* [x] [`_mm_mask_expandloadu_epi16`] //need i1
|
||||
* [x] [`_mm_maskz_expandloadu_epi16`] //need i1
|
||||
* [x] [`_mm256_mask_expandloadu_epi16`] //need i1
|
||||
* [x] [`_mm256_maskz_expandloadu_epi16`] //need i1
|
||||
* [x] [`_mm512_mask_expandloadu_epi16`] //need i1
|
||||
* [x] [`_mm512_maskz_expandloadu_epi16`] //need i1
|
||||
* [x] [`_mm_mask_expandloadu_epi8`] //need i1
|
||||
* [x] [`_mm_maskz_expandloadu_epi8`] //need i1
|
||||
* [x] [`_mm256_mask_expandloadu_epi8`] //need i1
|
||||
* [x] [`_mm256_maskz_expandloadu_epi8`] //need i1
|
||||
* [x] [`_mm512_mask_expandloadu_epi8`] //need i1
|
||||
* [x] [`_mm512_maskz_expandloadu_epi8`] //need i1
|
||||
* [x] [`_mm_mask_shldi_epi16`]
|
||||
* [x] [`_mm_maskz_shldi_epi16`]
|
||||
* [x] [`_mm_shldi_epi16`]
|
||||
* [x] [`_mm256_mask_shldi_epi16`]
|
||||
* [x] [`_mm256_maskz_shldi_epi16`]
|
||||
* [x] [`_mm256_shldi_epi16`]
|
||||
* [x] [`_mm512_mask_shldi_epi16`]
|
||||
* [x] [`_mm512_maskz_shldi_epi16`]
|
||||
* [x] [`_mm512_shldi_epi16`]
|
||||
* [x] [`_mm_mask_shldi_epi32`]
|
||||
* [x] [`_mm_maskz_shldi_epi32`]
|
||||
* [x] [`_mm_shldi_epi32`]
|
||||
* [x] [`_mm256_mask_shldi_epi32`]
|
||||
* [x] [`_mm256_maskz_shldi_epi32`]
|
||||
* [x] [`_mm256_shldi_epi32`]
|
||||
* [x] [`_mm512_mask_shldi_epi32`]
|
||||
* [x] [`_mm512_maskz_shldi_epi32`]
|
||||
* [x] [`_mm512_shldi_epi32`]
|
||||
* [x] [`_mm_mask_shldi_epi64`]
|
||||
* [x] [`_mm_maskz_shldi_epi64`]
|
||||
* [x] [`_mm_shldi_epi64`]
|
||||
* [x] [`_mm256_mask_shldi_epi64`]
|
||||
* [x] [`_mm256_maskz_shldi_epi64`]
|
||||
* [x] [`_mm256_shldi_epi64`]
|
||||
* [x] [`_mm512_mask_shldi_epi64`]
|
||||
* [x] [`_mm512_maskz_shldi_epi64`]
|
||||
* [x] [`_mm512_shldi_epi64`]
|
||||
* [x] [`_mm_mask_shldv_epi16`]
|
||||
* [x] [`_mm_maskz_shldv_epi16`]
|
||||
* [x] [`_mm_shldv_epi16`]
|
||||
* [x] [`_mm256_mask_shldv_epi16`]
|
||||
* [x] [`_mm256_maskz_shldv_epi16`]
|
||||
* [x] [`_mm256_shldv_epi16`]
|
||||
* [x] [`_mm512_mask_shldv_epi16`]
|
||||
* [x] [`_mm512_maskz_shldv_epi16`]
|
||||
* [x] [`_mm512_shldv_epi16`]
|
||||
* [x] [`_mm_mask_shldv_epi32`]
|
||||
* [x] [`_mm_maskz_shldv_epi32`]
|
||||
* [x] [`_mm_shldv_epi32`]
|
||||
* [x] [`_mm256_mask_shldv_epi32`]
|
||||
* [x] [`_mm256_maskz_shldv_epi32`]
|
||||
* [x] [`_mm256_shldv_epi32`]
|
||||
* [x] [`_mm512_mask_shldv_epi32`]
|
||||
* [x] [`_mm512_maskz_shldv_epi32`]
|
||||
* [x] [`_mm512_shldv_epi32`]
|
||||
* [x] [`_mm_mask_shldv_epi64`]
|
||||
* [x] [`_mm_maskz_shldv_epi64`]
|
||||
* [x] [`_mm_shldv_epi64`]
|
||||
* [x] [`_mm256_mask_shldv_epi64`]
|
||||
* [x] [`_mm256_maskz_shldv_epi64`]
|
||||
* [x] [`_mm256_shldv_epi64`]
|
||||
* [x] [`_mm512_mask_shldv_epi64`]
|
||||
* [x] [`_mm512_maskz_shldv_epi64`]
|
||||
* [x] [`_mm512_shldv_epi64`]
|
||||
* [x] [`_mm_mask_shrdi_epi16`]
|
||||
* [x] [`_mm_maskz_shrdi_epi16`]
|
||||
* [x] [`_mm_shrdi_epi16`]
|
||||
* [x] [`_mm256_mask_shrdi_epi16`]
|
||||
* [x] [`_mm256_maskz_shrdi_epi16`]
|
||||
* [x] [`_mm256_shrdi_epi16`]
|
||||
* [x] [`_mm512_mask_shrdi_epi16`]
|
||||
* [x] [`_mm512_maskz_shrdi_epi16`]
|
||||
* [x] [`_mm512_shrdi_epi16`]
|
||||
* [x] [`_mm_mask_shrdi_epi32`]
|
||||
* [x] [`_mm_maskz_shrdi_epi32`]
|
||||
* [x] [`_mm_shrdi_epi32`]
|
||||
* [x] [`_mm256_mask_shrdi_epi32`]
|
||||
* [x] [`_mm256_maskz_shrdi_epi32`]
|
||||
* [x] [`_mm256_shrdi_epi32`]
|
||||
* [x] [`_mm512_mask_shrdi_epi32`]
|
||||
* [x] [`_mm512_maskz_shrdi_epi32`]
|
||||
* [x] [`_mm512_shrdi_epi32`]
|
||||
* [x] [`_mm_mask_shrdi_epi64`]
|
||||
* [x] [`_mm_maskz_shrdi_epi64`]
|
||||
* [x] [`_mm_shrdi_epi64`]
|
||||
* [x] [`_mm256_mask_shrdi_epi64`]
|
||||
* [x] [`_mm256_maskz_shrdi_epi64`]
|
||||
* [x] [`_mm256_shrdi_epi64`]
|
||||
* [x] [`_mm512_mask_shrdi_epi64`]
|
||||
* [x] [`_mm512_maskz_shrdi_epi64`]
|
||||
* [x] [`_mm512_shrdi_epi64`]
|
||||
* [x] [`_mm_mask_shrdv_epi16`]
|
||||
* [x] [`_mm_maskz_shrdv_epi16`]
|
||||
* [x] [`_mm_shrdv_epi16`]
|
||||
* [x] [`_mm256_mask_shrdv_epi16`]
|
||||
* [x] [`_mm256_maskz_shrdv_epi16`]
|
||||
* [x] [`_mm256_shrdv_epi16`]
|
||||
* [x] [`_mm512_mask_shrdv_epi16`]
|
||||
* [x] [`_mm512_maskz_shrdv_epi16`]
|
||||
* [x] [`_mm512_shrdv_epi16`]
|
||||
* [x] [`_mm_mask_shrdv_epi32`]
|
||||
* [x] [`_mm_maskz_shrdv_epi32`]
|
||||
* [x] [`_mm_shrdv_epi32`]
|
||||
* [x] [`_mm256_mask_shrdv_epi32`]
|
||||
* [x] [`_mm256_maskz_shrdv_epi32`]
|
||||
* [x] [`_mm256_shrdv_epi32`]
|
||||
* [x] [`_mm512_mask_shrdv_epi32`]
|
||||
* [x] [`_mm512_maskz_shrdv_epi32`]
|
||||
* [x] [`_mm512_shrdv_epi32`]
|
||||
* [x] [`_mm_mask_shrdv_epi64`]
|
||||
* [x] [`_mm_maskz_shrdv_epi64`]
|
||||
* [x] [`_mm_shrdv_epi64`]
|
||||
* [x] [`_mm256_mask_shrdv_epi64`]
|
||||
* [x] [`_mm256_maskz_shrdv_epi64`]
|
||||
* [x] [`_mm256_shrdv_epi64`]
|
||||
* [x] [`_mm512_mask_shrdv_epi64`]
|
||||
* [x] [`_mm512_maskz_shrdv_epi64`]
|
||||
* [x] [`_mm512_shrdv_epi64`]
|
||||
</p>
|
||||
|
|
@ -31771,6 +31771,450 @@ pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d)
|
|||
);
|
||||
}
|
||||
|
||||
/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expandloadu_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_mask_expandloadu_epi32(
|
||||
src: __m512i,
|
||||
k: __mmask16,
|
||||
mem_addr: *const i32,
|
||||
) -> __m512i {
|
||||
let mut dst: __m512i = src;
|
||||
asm!(
|
||||
vpl!("vpexpandd {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expandloadu_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
|
||||
let mut dst: __m512i;
|
||||
asm!(
|
||||
vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_expandloadu_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_mask_expandloadu_epi32(
|
||||
src: __m256i,
|
||||
k: __mmask8,
|
||||
mem_addr: *const i32,
|
||||
) -> __m256i {
|
||||
let mut dst: __m256i = src;
|
||||
asm!(
|
||||
vpl!("vpexpandd {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_expandloadu_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
|
||||
let mut dst: __m256i;
|
||||
asm!(
|
||||
vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_expandloadu_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_mask_expandloadu_epi32(
|
||||
src: __m128i,
|
||||
k: __mmask8,
|
||||
mem_addr: *const i32,
|
||||
) -> __m128i {
|
||||
let mut dst: __m128i = src;
|
||||
asm!(
|
||||
vpl!("vpexpandd {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_expandloadu_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
|
||||
let mut dst: __m128i;
|
||||
asm!(
|
||||
vpl!("vpexpandd {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expandloadu_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_mask_expandloadu_epi64(
|
||||
src: __m512i,
|
||||
k: __mmask8,
|
||||
mem_addr: *const i64,
|
||||
) -> __m512i {
|
||||
let mut dst: __m512i = src;
|
||||
asm!(
|
||||
vpl!("vpexpandq {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expandloadu_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
|
||||
let mut dst: __m512i;
|
||||
asm!(
|
||||
vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_expandloadu_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_mask_expandloadu_epi64(
|
||||
src: __m256i,
|
||||
k: __mmask8,
|
||||
mem_addr: *const i64,
|
||||
) -> __m256i {
|
||||
let mut dst: __m256i = src;
|
||||
asm!(
|
||||
vpl!("vpexpandq {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_expandloadu_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
|
||||
let mut dst: __m256i;
|
||||
asm!(
|
||||
vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_expandloadu_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_mask_expandloadu_epi64(
|
||||
src: __m128i,
|
||||
k: __mmask8,
|
||||
mem_addr: *const i64,
|
||||
) -> __m128i {
|
||||
let mut dst: __m128i = src;
|
||||
asm!(
|
||||
vpl!("vpexpandq {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_expandloadu_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
|
||||
let mut dst: __m128i;
|
||||
asm!(
|
||||
vpl!("vpexpandq {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expandloadu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_mask_expandloadu_ps(
|
||||
src: __m512,
|
||||
k: __mmask16,
|
||||
mem_addr: *const f32,
|
||||
) -> __m512 {
|
||||
let mut dst: __m512 = src;
|
||||
asm!(
|
||||
vpl!("vexpandps {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expandloadu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
|
||||
let mut dst: __m512;
|
||||
asm!(
|
||||
vpl!("vexpandps {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_expandloadu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
|
||||
let mut dst: __m256 = src;
|
||||
asm!(
|
||||
vpl!("vexpandps {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_expandloadu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
|
||||
let mut dst: __m256;
|
||||
asm!(
|
||||
vpl!("vexpandps {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_expandloadu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
|
||||
let mut dst: __m128 = src;
|
||||
asm!(
|
||||
vpl!("vexpandps {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_expandloadu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
|
||||
let mut dst: __m128;
|
||||
asm!(
|
||||
vpl!("vexpandps {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_expandloadu_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_mask_expandloadu_pd(
|
||||
src: __m512d,
|
||||
k: __mmask8,
|
||||
mem_addr: *const f64,
|
||||
) -> __m512d {
|
||||
let mut dst: __m512d = src;
|
||||
asm!(
|
||||
vpl!("vexpandpd {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_expandloadu_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
|
||||
let mut dst: __m512d;
|
||||
asm!(
|
||||
vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(zmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_expandloadu_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_mask_expandloadu_pd(
|
||||
src: __m256d,
|
||||
k: __mmask8,
|
||||
mem_addr: *const f64,
|
||||
) -> __m256d {
|
||||
let mut dst: __m256d = src;
|
||||
asm!(
|
||||
vpl!("vexpandpd {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_expandloadu_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx")]
|
||||
pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
|
||||
let mut dst: __m256d;
|
||||
asm!(
|
||||
vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(ymm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_expandloadu_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
|
||||
let mut dst: __m128d = src;
|
||||
asm!(
|
||||
vpl!("vexpandpd {dst}{{{k}}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = inout(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Load contiguous active single-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_expandloadu_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
|
||||
pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
|
||||
let mut dst: __m128d;
|
||||
asm!(
|
||||
vpl!("vexpandpd {dst}{{{k}}} {{z}}"),
|
||||
p = in(reg) mem_addr,
|
||||
k = in(kreg) k,
|
||||
dst = out(xmm_reg) dst,
|
||||
options(pure, readonly, nostack)
|
||||
);
|
||||
dst
|
||||
}
|
||||
|
||||
/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_setr_pd&expand=5002)
|
||||
|
|
@ -55123,4 +55567,264 @@ mod tests {
|
|||
let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_expandloadu_epi32() {
|
||||
let src = _mm512_set1_epi32(42);
|
||||
let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
|
||||
let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_maskz_expandloadu_epi32() {
|
||||
let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
|
||||
let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_mask_expandloadu_epi32() {
|
||||
let src = _mm256_set1_epi32(42);
|
||||
let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
|
||||
let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_expandloadu_epi32() {
|
||||
let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
|
||||
let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_mask_expandloadu_epi32() {
|
||||
let src = _mm_set1_epi32(42);
|
||||
let a = &[1_i32, 2, 3, 4];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11111000;
|
||||
let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
|
||||
let e = _mm_set_epi32(1, 42, 42, 42);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_expandloadu_epi32() {
|
||||
let a = &[1_i32, 2, 3, 4];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11111000;
|
||||
let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
|
||||
let e = _mm_set_epi32(1, 0, 0, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_expandloadu_epi64() {
|
||||
let src = _mm512_set1_epi64(42);
|
||||
let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
|
||||
let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_maskz_expandloadu_epi64() {
|
||||
let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
|
||||
let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_mask_expandloadu_epi64() {
|
||||
let src = _mm256_set1_epi64x(42);
|
||||
let a = &[1_i64, 2, 3, 4];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
|
||||
let e = _mm256_set_epi64x(1, 42, 42, 42);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_expandloadu_epi64() {
|
||||
let a = &[1_i64, 2, 3, 4];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
|
||||
let e = _mm256_set_epi64x(1, 0, 0, 0);
|
||||
assert_eq_m256i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_mask_expandloadu_epi64() {
|
||||
let src = _mm_set1_epi64x(42);
|
||||
let a = &[1_i64, 2];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
|
||||
let e = _mm_set_epi64x(42, 42);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_expandloadu_epi64() {
|
||||
let a = &[1_i64, 2];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
|
||||
let e = _mm_set_epi64x(0, 0);
|
||||
assert_eq_m128i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_expandloadu_ps() {
|
||||
let src = _mm512_set1_ps(42.);
|
||||
let a = &[
|
||||
1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
|
||||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
|
||||
let e = _mm512_set_ps(
|
||||
8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
|
||||
);
|
||||
assert_eq_m512(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_maskz_expandloadu_ps() {
|
||||
let a = &[
|
||||
1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
|
||||
];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000_11001010;
|
||||
let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
|
||||
let e = _mm512_set_ps(
|
||||
8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
|
||||
);
|
||||
assert_eq_m512(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_mask_expandloadu_ps() {
|
||||
let src = _mm256_set1_ps(42.);
|
||||
let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
|
||||
let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_expandloadu_ps() {
|
||||
let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
|
||||
let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
|
||||
assert_eq_m256(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_mask_expandloadu_ps() {
|
||||
let src = _mm_set1_ps(42.);
|
||||
let a = &[1.0f32, 2., 3., 4.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
|
||||
let e = _mm_set_ps(1., 42., 42., 42.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_expandloadu_ps() {
|
||||
let a = &[1.0f32, 2., 3., 4.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm_maskz_expandloadu_ps(m, black_box(p));
|
||||
let e = _mm_set_ps(1., 0., 0., 0.);
|
||||
assert_eq_m128(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_expandloadu_pd() {
|
||||
let src = _mm512_set1_pd(42.);
|
||||
let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
|
||||
let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_maskz_expandloadu_pd() {
|
||||
let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
|
||||
let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_mask_expandloadu_pd() {
|
||||
let src = _mm256_set1_pd(42.);
|
||||
let a = &[1.0f64, 2., 3., 4.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
|
||||
let e = _mm256_set_pd(1., 42., 42., 42.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_expandloadu_pd() {
|
||||
let a = &[1.0f64, 2., 3., 4.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
|
||||
let e = _mm256_set_pd(1., 0., 0., 0.);
|
||||
assert_eq_m256d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_mask_expandloadu_pd() {
|
||||
let src = _mm_set1_pd(42.);
|
||||
let a = &[1.0f64, 2.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
|
||||
let e = _mm_set_pd(42., 42.);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_expandloadu_pd() {
|
||||
let a = &[1.0f64, 2.];
|
||||
let p = a.as_ptr();
|
||||
let m = 0b11101000;
|
||||
let r = _mm_maskz_expandloadu_pd(m, black_box(p));
|
||||
let e = _mm_set_pd(0., 0.);
|
||||
assert_eq_m128d(r, e);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue