avx512vbmi2 (#979)

This commit is contained in:
minybot 2021-01-11 11:52:19 -05:00 committed by GitHub
parent 981e250f89
commit 6dc569d9d9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 3639 additions and 5 deletions

View file

@ -77,7 +77,7 @@ jobs:
- mips64-unknown-linux-gnuabi64
- mips64el-unknown-linux-gnuabi64
- s390x-unknown-linux-gnu
- wasm32-wasi
#- wasm32-wasi
- i586-unknown-linux-gnu
- x86_64-linux-android
- arm-linux-androideabi
@ -130,8 +130,8 @@ jobs:
disable_assert_instr: true
- target: s390x-unknown-linux-gnu
os: ubuntu-latest
- target: wasm32-wasi
os: ubuntu-latest
#- target: wasm32-wasi
# os: ubuntu-latest
- target: aarch64-unknown-linux-gnu
os: ubuntu-latest
- target: x86_64-apple-darwin

View file

@ -0,0 +1,153 @@
<summary>["AVX512_VBMI2"]</summary><p>
* [x] [`_mm_mask_compress_epi16`]
* [x] [`_mm_maskz_compress_epi16`]
* [x] [`_mm256_mask_compress_epi16`]
* [x] [`_mm256_maskz_compress_epi16`]
* [x] [`_mm512_mask_compress_epi16`]
* [x] [`_mm512_maskz_compress_epi16`]
* [x] [`_mm_mask_compress_epi8`]
* [x] [`_mm_maskz_compress_epi8`]
* [x] [`_mm256_mask_compress_epi8`]
* [x] [`_mm256_maskz_compress_epi8`]
* [x] [`_mm512_mask_compress_epi8`]
* [x] [`_mm512_maskz_compress_epi8`]
* [_] [`_mm_mask_compressstoreu_epi16`]
* [_] [`_mm256_mask_compressstoreu_epi16`]
* [_] [`_mm512_mask_compressstoreu_epi16`]
* [_] [`_mm_mask_compressstoreu_epi8`]
* [_] [`_mm256_mask_compressstoreu_epi8`]
* [_] [`_mm512_mask_compressstoreu_epi8`]
* [x] [`_mm_mask_expand_epi16`]
* [x] [`_mm_maskz_expand_epi16`]
* [x] [`_mm256_mask_expand_epi16`]
* [x] [`_mm256_maskz_expand_epi16`]
* [x] [`_mm512_mask_expand_epi16`]
* [x] [`_mm512_maskz_expand_epi16`]
* [x] [`_mm_mask_expand_epi8`]
* [x] [`_mm_maskz_expand_epi8`]
* [x] [`_mm256_mask_expand_epi8`]
* [x] [`_mm256_maskz_expand_epi8`]
* [x] [`_mm512_mask_expand_epi8`]
* [x] [`_mm512_maskz_expand_epi8`]
* [_] [`_mm_mask_expandloadu_epi16`]
* [_] [`_mm_maskz_expandloadu_epi16`]
* [_] [`_mm256_mask_expandloadu_epi16`]
* [_] [`_mm256_maskz_expandloadu_epi16`]
* [_] [`_mm512_mask_expandloadu_epi16`]
* [_] [`_mm512_maskz_expandloadu_epi16`]
* [_] [`_mm_mask_expandloadu_epi8`]
* [_] [`_mm_maskz_expandloadu_epi8`]
* [_] [`_mm256_mask_expandloadu_epi8`]
* [_] [`_mm256_maskz_expandloadu_epi8`]
* [_] [`_mm512_mask_expandloadu_epi8`]
* [_] [`_mm512_maskz_expandloadu_epi8`]
* [x] [`_mm_mask_shldi_epi16`]
* [x] [`_mm_maskz_shldi_epi16`]
* [x] [`_mm_shldi_epi16`]
* [x] [`_mm256_mask_shldi_epi16`]
* [x] [`_mm256_maskz_shldi_epi16`]
* [x] [`_mm256_shldi_epi16`]
* [x] [`_mm512_mask_shldi_epi16`]
* [x] [`_mm512_maskz_shldi_epi16`]
* [x] [`_mm512_shldi_epi16`]
* [x] [`_mm_mask_shldi_epi32`]
* [x] [`_mm_maskz_shldi_epi32`]
* [x] [`_mm_shldi_epi32`]
* [x] [`_mm256_mask_shldi_epi32`]
* [x] [`_mm256_maskz_shldi_epi32`]
* [x] [`_mm256_shldi_epi32`]
* [x] [`_mm512_mask_shldi_epi32`]
* [x] [`_mm512_maskz_shldi_epi32`]
* [x] [`_mm512_shldi_epi32`]
* [x] [`_mm_mask_shldi_epi64`]
* [x] [`_mm_maskz_shldi_epi64`]
* [x] [`_mm_shldi_epi64`]
* [x] [`_mm256_mask_shldi_epi64`]
* [x] [`_mm256_maskz_shldi_epi64`]
* [x] [`_mm256_shldi_epi64`]
* [x] [`_mm512_mask_shldi_epi64`]
* [x] [`_mm512_maskz_shldi_epi64`]
* [x] [`_mm512_shldi_epi64`]
* [x] [`_mm_mask_shldv_epi16`]
* [x] [`_mm_maskz_shldv_epi16`]
* [x] [`_mm_shldv_epi16`]
* [x] [`_mm256_mask_shldv_epi16`]
* [x] [`_mm256_maskz_shldv_epi16`]
* [x] [`_mm256_shldv_epi16`]
* [x] [`_mm512_mask_shldv_epi16`]
* [x] [`_mm512_maskz_shldv_epi16`]
* [x] [`_mm512_shldv_epi16`]
* [x] [`_mm_mask_shldv_epi32`]
* [x] [`_mm_maskz_shldv_epi32`]
* [x] [`_mm_shldv_epi32`]
* [x] [`_mm256_mask_shldv_epi32`]
* [x] [`_mm256_maskz_shldv_epi32`]
* [x] [`_mm256_shldv_epi32`]
* [x] [`_mm512_mask_shldv_epi32`]
* [x] [`_mm512_maskz_shldv_epi32`]
* [x] [`_mm512_shldv_epi32`]
* [x] [`_mm_mask_shldv_epi64`]
* [x] [`_mm_maskz_shldv_epi64`]
* [x] [`_mm_shldv_epi64`]
* [x] [`_mm256_mask_shldv_epi64`]
* [x] [`_mm256_maskz_shldv_epi64`]
* [x] [`_mm256_shldv_epi64`]
* [x] [`_mm512_mask_shldv_epi64`]
* [x] [`_mm512_maskz_shldv_epi64`]
* [x] [`_mm512_shldv_epi64`]
* [x] [`_mm_mask_shrdi_epi16`]
* [x] [`_mm_maskz_shrdi_epi16`]
* [x] [`_mm_shrdi_epi16`]
* [x] [`_mm256_mask_shrdi_epi16`]
* [x] [`_mm256_maskz_shrdi_epi16`]
* [x] [`_mm256_shrdi_epi16`]
* [x] [`_mm512_mask_shrdi_epi16`]
* [x] [`_mm512_maskz_shrdi_epi16`]
* [x] [`_mm512_shrdi_epi16`]
* [x] [`_mm_mask_shrdi_epi32`]
* [x] [`_mm_maskz_shrdi_epi32`]
* [x] [`_mm_shrdi_epi32`]
* [x] [`_mm256_mask_shrdi_epi32`]
* [x] [`_mm256_maskz_shrdi_epi32`]
* [x] [`_mm256_shrdi_epi32`]
* [x] [`_mm512_mask_shrdi_epi32`]
* [x] [`_mm512_maskz_shrdi_epi32`]
* [x] [`_mm512_shrdi_epi32`]
* [x] [`_mm_mask_shrdi_epi64`]
* [x] [`_mm_maskz_shrdi_epi64`]
* [x] [`_mm_shrdi_epi64`]
* [x] [`_mm256_mask_shrdi_epi64`]
* [x] [`_mm256_maskz_shrdi_epi64`]
* [x] [`_mm256_shrdi_epi64`]
* [x] [`_mm512_mask_shrdi_epi64`]
* [x] [`_mm512_maskz_shrdi_epi64`]
* [x] [`_mm512_shrdi_epi64`]
* [x] [`_mm_mask_shrdv_epi16`]
* [x] [`_mm_maskz_shrdv_epi16`]
* [x] [`_mm_shrdv_epi16`]
* [x] [`_mm256_mask_shrdv_epi16`]
* [x] [`_mm256_maskz_shrdv_epi16`]
* [x] [`_mm256_shrdv_epi16`]
* [x] [`_mm512_mask_shrdv_epi16`]
* [x] [`_mm512_maskz_shrdv_epi16`]
* [x] [`_mm512_shrdv_epi16`]
* [x] [`_mm_mask_shrdv_epi32`]
* [x] [`_mm_maskz_shrdv_epi32`]
* [x] [`_mm_shrdv_epi32`]
* [x] [`_mm256_mask_shrdv_epi32`]
* [x] [`_mm256_maskz_shrdv_epi32`]
* [x] [`_mm256_shrdv_epi32`]
* [x] [`_mm512_mask_shrdv_epi32`]
* [x] [`_mm512_maskz_shrdv_epi32`]
* [x] [`_mm512_shrdv_epi32`]
* [x] [`_mm_mask_shrdv_epi64`]
* [x] [`_mm_maskz_shrdv_epi64`]
* [x] [`_mm_shrdv_epi64`]
* [x] [`_mm256_mask_shrdv_epi64`]
* [x] [`_mm256_maskz_shrdv_epi64`]
* [x] [`_mm256_shrdv_epi64`]
* [x] [`_mm512_mask_shrdv_epi64`]
* [x] [`_mm512_maskz_shrdv_epi64`]
* [x] [`_mm512_shrdv_epi64`]
</p>

View file

@ -438,8 +438,6 @@ mod tests {
use stdarch_test::simd_test;
use crate::core_arch::x86::*;
//use crate::hint::black_box;
//use crate::mem::{self};
#[simd_test(enable = "avx512vbmi")]
unsafe fn test_mm512_permutex2var_epi8() {

File diff suppressed because it is too large Load diff

View file

@ -678,6 +678,9 @@ pub use self::avx512ifma::*;
mod avx512vbmi;
pub use self::avx512vbmi::*;
mod avx512vbmi2;
pub use self::avx512vbmi2::*;
mod avx512bitalg;
pub use self::avx512bitalg::*;

View file

@ -479,6 +479,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
// The XML file names VBMI as "avx512_vbmi", while Rust calls
// it "avx512vbmi".
"avx512_vbmi" => String::from("avx512vbmi"),
// The XML file names VBMI2 as "avx512_vbmi2", while Rust calls
// it "avx512vbmi2".
"avx512_vbmi2" => String::from("avx512vbmi2"),
// Some AVX512f intrinsics are also supported by Knight's Corner.
// The XML lists them as avx512f/kncni, but we are solely gating
// them behind avx512f since we don't have a KNC feature yet.