Add AVX512VPOPCNTDQ Intrinsics (#962)
Adds all 18 intrinsics belongin to AVX512VPOPCNTDQ counting the number of set bits in packed 32 / 64 bit integers. Intrinsics match _mm(256|512)?(_maskz?)?_popcnt_epi(32|64). Versions for packed 8 and 16 bit integers are part of BITALG.
This commit is contained in:
parent
a6be1973aa
commit
c1ae807d89
2 changed files with 544 additions and 0 deletions
541
library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs
Normal file
541
library/stdarch/crates/core_arch/src/x86/avx512vpopcntdq.rs
Normal file
|
|
@ -0,0 +1,541 @@
|
|||
//! Vectorized Population Count Instructions for Double- and Quadwords (VPOPCNTDQ)
|
||||
//!
|
||||
//! The intrinsics here correspond to those in the `immintrin.h` C header.
|
||||
//!
|
||||
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
|
||||
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
|
||||
//!
|
||||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
|
||||
|
||||
use crate::core_arch::simd::i32x16;
|
||||
use crate::core_arch::simd::i32x4;
|
||||
use crate::core_arch::simd::i32x8;
|
||||
use crate::core_arch::simd::i64x2;
|
||||
use crate::core_arch::simd::i64x4;
|
||||
use crate::core_arch::simd::i64x8;
|
||||
use crate::core_arch::simd_llvm::simd_select_bitmask;
|
||||
use crate::core_arch::x86::__m128i;
|
||||
use crate::core_arch::x86::__m256i;
|
||||
use crate::core_arch::x86::__m512i;
|
||||
use crate::core_arch::x86::__mmask16;
|
||||
use crate::core_arch::x86::__mmask8;
|
||||
use crate::core_arch::x86::_mm256_setzero_si256;
|
||||
use crate::core_arch::x86::_mm512_setzero_si512;
|
||||
use crate::core_arch::x86::_mm_setzero_si128;
|
||||
use crate::core_arch::x86::m128iExt;
|
||||
use crate::core_arch::x86::m256iExt;
|
||||
use crate::core_arch::x86::m512iExt;
|
||||
use crate::mem::transmute;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdarch_test::assert_instr;
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.ctpop.v16i32"]
|
||||
fn popcnt_v16i32(x: i32x16) -> i32x16;
|
||||
#[link_name = "llvm.ctpop.v8i32"]
|
||||
fn popcnt_v8i32(x: i32x8) -> i32x8;
|
||||
#[link_name = "llvm.ctpop.v4i32"]
|
||||
fn popcnt_v4i32(x: i32x4) -> i32x4;
|
||||
|
||||
#[link_name = "llvm.ctpop.v8i64"]
|
||||
fn popcnt_v8i64(x: i64x8) -> i64x8;
|
||||
#[link_name = "llvm.ctpop.v4i64"]
|
||||
fn popcnt_v4i64(x: i64x4) -> i64x4;
|
||||
#[link_name = "llvm.ctpop.v2i64"]
|
||||
fn popcnt_v2i64(x: i64x2) -> i64x2;
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
|
||||
transmute(popcnt_v16i32(a.as_i32x16()))
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
||||
let zero = _mm512_setzero_si512().as_i32x16();
|
||||
transmute(simd_select_bitmask(k, popcnt_v16i32(a.as_i32x16()), zero))
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
popcnt_v16i32(a.as_i32x16()),
|
||||
src.as_i32x16(),
|
||||
))
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
|
||||
transmute(popcnt_v8i32(a.as_i32x8()))
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
|
||||
let zero = _mm256_setzero_si256().as_i32x8();
|
||||
transmute(simd_select_bitmask(k, popcnt_v8i32(a.as_i32x8()), zero))
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
popcnt_v8i32(a.as_i32x8()),
|
||||
src.as_i32x8(),
|
||||
))
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
|
||||
transmute(popcnt_v4i32(a.as_i32x4()))
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
|
||||
let zero = _mm_setzero_si128().as_i32x4();
|
||||
transmute(simd_select_bitmask(k, popcnt_v4i32(a.as_i32x4()), zero))
|
||||
}
|
||||
|
||||
/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntd))]
|
||||
pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
popcnt_v4i32(a.as_i32x4()),
|
||||
src.as_i32x4(),
|
||||
))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
|
||||
transmute(popcnt_v8i64(a.as_i64x8()))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
|
||||
let zero = _mm512_setzero_si512().as_i64x8();
|
||||
transmute(simd_select_bitmask(k, popcnt_v8i64(a.as_i64x8()), zero))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
popcnt_v8i64(a.as_i64x8()),
|
||||
src.as_i64x8(),
|
||||
))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
|
||||
transmute(popcnt_v4i64(a.as_i64x4()))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
|
||||
let zero = _mm256_setzero_si256().as_i64x4();
|
||||
transmute(simd_select_bitmask(k, popcnt_v4i64(a.as_i64x4()), zero))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
popcnt_v4i64(a.as_i64x4()),
|
||||
src.as_i64x4(),
|
||||
))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
|
||||
transmute(popcnt_v2i64(a.as_i64x2()))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
|
||||
let zero = _mm_setzero_si128().as_i64x2();
|
||||
transmute(simd_select_bitmask(k, popcnt_v2i64(a.as_i64x2()), zero))
|
||||
}
|
||||
|
||||
/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
|
||||
///
|
||||
/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
|
||||
/// Otherwise the computation result is written into the result.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_popcnt_epi64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
|
||||
#[cfg_attr(test, assert_instr(vpopcntq))]
|
||||
pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
|
||||
transmute(simd_select_bitmask(
|
||||
k,
|
||||
popcnt_v2i64(a.as_i64x2()),
|
||||
src.as_i64x2(),
|
||||
))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::x86::*;
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f")]
|
||||
unsafe fn test_mm512_popcnt_epi32() {
|
||||
let test_data = _mm512_set_epi32(
|
||||
0,
|
||||
1,
|
||||
-1,
|
||||
2,
|
||||
7,
|
||||
0xFF_FE,
|
||||
0x7F_FF_FF_FF,
|
||||
-100,
|
||||
0x40_00_00_00,
|
||||
103,
|
||||
371,
|
||||
552,
|
||||
432_948,
|
||||
818_826_998,
|
||||
255,
|
||||
256,
|
||||
);
|
||||
let actual_result = _mm512_popcnt_epi32(test_data);
|
||||
let reference_result =
|
||||
_mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1);
|
||||
assert_eq_m512i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f")]
|
||||
unsafe fn test_mm512_mask_popcnt_epi32() {
|
||||
let test_data = _mm512_set_epi32(
|
||||
0,
|
||||
1,
|
||||
-1,
|
||||
2,
|
||||
7,
|
||||
0xFF_FE,
|
||||
0x7F_FF_FF_FF,
|
||||
-100,
|
||||
0x40_00_00_00,
|
||||
103,
|
||||
371,
|
||||
552,
|
||||
432_948,
|
||||
818_826_998,
|
||||
255,
|
||||
256,
|
||||
);
|
||||
let mask = 0xFF_00;
|
||||
let actual_result = _mm512_mask_popcnt_epi32(test_data, mask, test_data);
|
||||
let reference_result = _mm512_set_epi32(
|
||||
0,
|
||||
1,
|
||||
32,
|
||||
1,
|
||||
3,
|
||||
15,
|
||||
31,
|
||||
28,
|
||||
0x40_00_00_00,
|
||||
103,
|
||||
371,
|
||||
552,
|
||||
432_948,
|
||||
818_826_998,
|
||||
255,
|
||||
256,
|
||||
);
|
||||
assert_eq_m512i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f")]
|
||||
unsafe fn test_mm512_maskz_popcnt_epi32() {
|
||||
let test_data = _mm512_set_epi32(
|
||||
0,
|
||||
1,
|
||||
-1,
|
||||
2,
|
||||
7,
|
||||
0xFF_FE,
|
||||
0x7F_FF_FF_FF,
|
||||
-100,
|
||||
0x40_00_00_00,
|
||||
103,
|
||||
371,
|
||||
552,
|
||||
432_948,
|
||||
818_826_998,
|
||||
255,
|
||||
256,
|
||||
);
|
||||
let mask = 0xFF_00;
|
||||
let actual_result = _mm512_maskz_popcnt_epi32(mask, test_data);
|
||||
let reference_result = _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
assert_eq_m512i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_popcnt_epi32() {
|
||||
let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
|
||||
let actual_result = _mm256_popcnt_epi32(test_data);
|
||||
let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28);
|
||||
assert_eq_m256i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_mask_popcnt_epi32() {
|
||||
let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
|
||||
let mask = 0xF0;
|
||||
let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data);
|
||||
let reference_result = _mm256_set_epi32(0, 1, 32, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
|
||||
assert_eq_m256i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_popcnt_epi32() {
|
||||
let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
|
||||
let mask = 0xF0;
|
||||
let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data);
|
||||
let reference_result = _mm256_set_epi32(0, 1, 32, 1, 0, 0, 0, 0);
|
||||
assert_eq_m256i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
|
||||
unsafe fn test_mm_popcnt_epi32() {
|
||||
let test_data = _mm_set_epi32(0, 1, -1, -100);
|
||||
let actual_result = _mm_popcnt_epi32(test_data);
|
||||
let reference_result = _mm_set_epi32(0, 1, 32, 28);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
|
||||
unsafe fn test_mm_mask_popcnt_epi32() {
|
||||
let test_data = _mm_set_epi32(0, 1, -1, -100);
|
||||
let mask = 0xE;
|
||||
let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data);
|
||||
let reference_result = _mm_set_epi32(0, 1, 32, -100);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
|
||||
unsafe fn test_mm_maskz_popcnt_epi32() {
|
||||
let test_data = _mm_set_epi32(0, 1, -1, -100);
|
||||
let mask = 0xE;
|
||||
let actual_result = _mm_maskz_popcnt_epi32(mask, test_data);
|
||||
let reference_result = _mm_set_epi32(0, 1, 32, 0);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f")]
|
||||
unsafe fn test_mm512_popcnt_epi64() {
|
||||
let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
|
||||
let actual_result = _mm512_popcnt_epi64(test_data);
|
||||
let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60);
|
||||
assert_eq_m512i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f")]
|
||||
unsafe fn test_mm512_mask_popcnt_epi64() {
|
||||
let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
|
||||
let mask = 0xF0;
|
||||
let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data);
|
||||
let reference_result =
|
||||
_mm512_set_epi64(0, 1, 64, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
|
||||
assert_eq_m512i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512f")]
|
||||
unsafe fn test_mm512_maskz_popcnt_epi64() {
|
||||
let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
|
||||
let mask = 0xF0;
|
||||
let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data);
|
||||
let reference_result = _mm512_set_epi64(0, 1, 64, 1, 0, 0, 0, 0);
|
||||
assert_eq_m512i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512vl")]
|
||||
unsafe fn test_mm256_popcnt_epi64() {
|
||||
let test_data = _mm256_set_epi64x(0, 1, -1, -100);
|
||||
let actual_result = _mm256_popcnt_epi64(test_data);
|
||||
let reference_result = _mm256_set_epi64x(0, 1, 64, 60);
|
||||
assert_eq_m256i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512vl")]
|
||||
unsafe fn test_mm256_mask_popcnt_epi64() {
|
||||
let test_data = _mm256_set_epi64x(0, 1, -1, -100);
|
||||
let mask = 0xE;
|
||||
let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data);
|
||||
let reference_result = _mm256_set_epi64x(0, 1, 64, -100);
|
||||
assert_eq_m256i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512vl")]
|
||||
unsafe fn test_mm256_maskz_popcnt_epi64() {
|
||||
let test_data = _mm256_set_epi64x(0, 1, -1, -100);
|
||||
let mask = 0xE;
|
||||
let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data);
|
||||
let reference_result = _mm256_set_epi64x(0, 1, 64, 0);
|
||||
assert_eq_m256i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512vl")]
|
||||
unsafe fn test_mm_popcnt_epi64() {
|
||||
let test_data = _mm_set_epi64x(0, 1);
|
||||
let actual_result = _mm_popcnt_epi64(test_data);
|
||||
let reference_result = _mm_set_epi64x(0, 1);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
let test_data = _mm_set_epi64x(-1, -100);
|
||||
let actual_result = _mm_popcnt_epi64(test_data);
|
||||
let reference_result = _mm_set_epi64x(64, 60);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512vl")]
|
||||
unsafe fn test_mm_mask_popcnt_epi64() {
|
||||
let test_data = _mm_set_epi64x(0, -100);
|
||||
let mask = 0x2;
|
||||
let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
|
||||
let reference_result = _mm_set_epi64x(0, -100);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
let test_data = _mm_set_epi64x(-1, 1);
|
||||
let mask = 0x2;
|
||||
let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
|
||||
let reference_result = _mm_set_epi64x(64, 1);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512vpopcntdq,avx512vl")]
|
||||
unsafe fn test_mm_maskz_popcnt_epi64() {
|
||||
let test_data = _mm_set_epi64x(0, 1);
|
||||
let mask = 0x2;
|
||||
let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
|
||||
let reference_result = _mm_set_epi64x(0, 0);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
let test_data = _mm_set_epi64x(-1, -100);
|
||||
let mask = 0x2;
|
||||
let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
|
||||
let reference_result = _mm_set_epi64x(64, 0);
|
||||
assert_eq_m128i(actual_result, reference_result);
|
||||
}
|
||||
}
|
||||
|
|
@ -672,6 +672,9 @@ pub use self::avx512bw::*;
|
|||
mod avx512ifma;
|
||||
pub use self::avx512ifma::*;
|
||||
|
||||
mod avx512vpopcntdq;
|
||||
pub use self::avx512vpopcntdq::*;
|
||||
|
||||
mod avx512vaes;
|
||||
pub use self::avx512vaes::*;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue