From 1edc72e82598899289047945801b88a686ec2c1f Mon Sep 17 00:00:00 2001 From: minybot Date: Mon, 24 Aug 2020 20:29:47 -0400 Subject: [PATCH] add some avx512f intrinsics(mask, rotation, shift) (#884) --- library/stdarch/crates/core_arch/avx512f.md | 107 + .../crates/core_arch/src/aarch64/neon/mod.rs | 12 +- .../crates/core_arch/src/arm/neon/mod.rs | 10 +- library/stdarch/crates/core_arch/src/simd.rs | 660 +++- .../crates/core_arch/src/x86/avx512f.rs | 2679 +++++++++++++++++ .../crates/core_arch/src/x86_64/avx512f.rs | 1063 +++++++ 6 files changed, 4436 insertions(+), 95 deletions(-) create mode 100644 library/stdarch/crates/core_arch/avx512f.md diff --git a/library/stdarch/crates/core_arch/avx512f.md b/library/stdarch/crates/core_arch/avx512f.md new file mode 100644 index 000000000000..567fd0e7ce22 --- /dev/null +++ b/library/stdarch/crates/core_arch/avx512f.md @@ -0,0 +1,107 @@ +["AVX512F"]

+ * [x] [`_mm512_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi32&expand=5236) + * [x] [`_mm512_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi64&expand=5236) + * [x] [`_mm512_and_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_si512&expand=5236) + * [x] [`_mm512_kand`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kand&expand=5236) + * [x] [`_mm512_kor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kor&expand=5236) + * [x] [`_mm512_kxor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kxor&expand=5236) + * [x] [`_kand_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212) + * [x] [`_kor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239) + * [x] [`_kxor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291) + * [x] [`_mm512_mask_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi32&expand=5236) + * [x] [`_mm512_mask_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi64&expand=5236) + * [x] [`_mm512_mask_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi32&expand=5236) + * [x] [`_mm512_mask_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi64&expand=5236) + * [x] [`_mm512_mask_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=5236) + * [x] [`_mm512_mask_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=5236) + * [x] [`_mm512_mask_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi32&expand=5236) + * [x] [`_mm512_mask_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi64&expand=5236) + * [x] [`_mm512_mask_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi32&expand=5236) + * [x] [`_mm512_mask_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi64&expand=5236) + * [x] [`_mm512_mask_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi32&expand=5236) + * [x] [`_mm512_mask_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi64&expand=5236) + * [x] [`_mm512_mask_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi32&expand=5236) + * [x] [`_mm512_mask_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi64&expand=5236) + * [x] [`_mm512_mask_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi32&expand=5236) + * [x] [`_mm512_mask_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi64&expand=5236) + * [x] [`_mm512_mask_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi32&expand=5236) + * [x] [`_mm512_mask_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi64&expand=5236) + * [x] [`_mm512_mask_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi32&expand=5236) + * [x] [`_mm512_mask_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi64&expand=5236) + * [x] [`_mm512_mask_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi32&expand=5236) + * [x] [`_mm512_mask_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi64&expand=5236) + * [x] [`_mm512_mask_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi32&expand=5236) + * [x] [`_mm512_mask_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi64&expand=5236) + * [x] [`_mm512_mask_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi32&expand=5236) + * [x] [`_mm512_mask_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi64&expand=5236) + * [x] [`_mm512_mask_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi32&expand=5236) + * [x] [`_mm512_mask_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi64&expand=5236) + * [x] [`_mm512_mask_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi32&expand=5236) + * [x] [`_mm512_mask_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi64&expand=5236) + * [x] [`_mm512_mask_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi32&expand=5236) + * [x] [`_mm512_mask_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi64&expand=5236) + * [x] [`_mm512_maskz_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi32&expand=5236) + * [x] [`_mm512_maskz_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi64&expand=5236) + * [x] [`_mm512_maskz_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi32&expand=5236) + * [x] [`_mm512_maskz_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi64&expand=5236) + * [x] [`_mm512_maskz_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi32&expand=5236) + * [x] [`_mm512_maskz_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi64&expand=5236) + * [x] [`_mm512_maskz_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi32&expand=5236) + * [x] [`_mm512_maskz_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi64&expand=5236) + * [x] [`_mm512_maskz_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi32&expand=5236) + * [x] [`_mm512_maskz_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi64&expand=5236) + * [x] [`_mm512_maskz_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi32&expand=5236) + * [x] [`_mm512_maskz_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi64&expand=5236) + * [x] [`_mm512_maskz_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi32&expand=5236) + * [x] [`_mm512_maskz_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi64&expand=5236) + * [x] [`_mm512_maskz_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi32&expand=5236) + * [x] [`_mm512_maskz_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi64&expand=5236) + * [x] [`_mm512_maskz_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi32&expand=5236) + * [x] [`_mm512_maskz_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi64&expand=5236) + * [x] [`_mm512_maskz_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi32&expand=5236) + * [x] [`_mm512_maskz_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi64&expand=5236) + * [x] [`_mm512_maskz_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi32&expand=5236) + * [x] [`_mm512_maskz_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi64&expand=5236) + * [x] [`_mm512_maskz_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi32&expand=5236) + * [x] [`_mm512_maskz_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi64&expand=5236) + * [x] [`_mm512_maskz_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi32&expand=5236) + * [x] [`_mm512_maskz_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi64&expand=5236) + * [x] [`_mm512_maskz_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi32&expand=5236) + * [x] [`_mm512_maskz_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi64&expand=5236) + * [x] [`_mm512_maskz_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi32&expand=5236) + * [x] [`_mm512_maskz_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi64&expand=5236) + * [x] [`_mm512_maskz_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi32&expand=5236) + * [x] [`_mm512_maskz_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi64&expand=5236) + * [x] [`_mm512_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi32&expand=5236) + * [x] [`_mm512_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi64&expand=5236) + * [x] [`_mm512_or_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_si512&expand=5236) + * [x] [`_mm512_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=5236) + * [x] [`_mm512_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=5236) + * [x] [`_mm512_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi32&expand=5236) + * [x] [`_mm512_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi64&expand=5236) + * [x] [`_mm512_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi32&expand=5236) + * [x] [`_mm512_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi64&expand=5236) + * [x] [`_mm512_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi32&expand=5236) + * [x] [`_mm512_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi64&expand=5236) + * [x] [`_mm512_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi32&expand=5236) + * [x] [`_mm512_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi64&expand=5236) + * [x] [`_mm512_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi32&expand=5236) + * [x] [`_mm512_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi64&expand=5236) + * [x] [`_mm512_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi32&expand=5236) + * [x] [`_mm512_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi64&expand=5236) + * [x] [`_mm512_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi32&expand=5236) + * [x] [`_mm512_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi64&expand=5236) + * [x] [`_mm512_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi32&expand=5236) + * [x] [`_mm512_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi64&expand=5236) + * [x] [`_mm512_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi32&expand=5236) + * [x] [`_mm512_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi64&expand=5236) + * [x] [`_mm512_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi32&expand=5236) + * [x] [`_mm512_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi64&expand=5236) + * [x] [`_mm512_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi32&expand=5236) + * [x] [`_mm512_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi64&expand=5236) + * [x] [`_mm512_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi32&expand=5236) + * [x] [`_mm512_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi64&expand=5236) + * [x] [`_mm512_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi32&expand=5236) + * [x] [`_mm512_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi64&expand=5236) + * [x] [`_mm512_xor_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_si512&expand=5236) +

diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs index 0c73e5935de4..adc653e31c04 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs @@ -88,7 +88,7 @@ extern "C" { fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; #[link_name = "llvm.aarch64.neon.addp.v16i8"] fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; - + #[link_name = "llvm.aarch64.neon.saddv.i32.v4i16"] fn vaddv_s16_(a: int16x4_t) -> i16; #[link_name = "llvm.aarch64.neon.saddv.i32.v2i32"] @@ -1826,9 +1826,13 @@ mod tests { #[simd_test(enable = "neon")] unsafe fn test_vpaddq_s8() { let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let b = i8x16::new(0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15); + let b = i8x16::new( + 0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15, + ); let r: i8x16 = transmute(vpaddq_s8(transmute(a), transmute(b))); - let e = i8x16::new(3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29); + let e = i8x16::new( + 3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29, + ); assert_eq!(r, e); } #[simd_test(enable = "neon")] @@ -2829,7 +2833,7 @@ mod tests { let e = i64x2::new(i64::MIN, i64::MAX); assert_eq!(r, e); } - + #[simd_test(enable = "neon")] unsafe fn test_vaddv_s16() { let a = i16x4::new(1, 2, 3, -4); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs index c006ea70d496..43fa753cc331 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs @@ -175,7 +175,7 @@ extern "C" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")] fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")] fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; @@ -299,7 +299,7 @@ pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vpadd_s8_(a,b) + vpadd_s8_(a, b) } /// Add pairwise. #[inline] @@ -308,7 +308,7 @@ pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - transmute(vpadd_s16_(transmute(a),transmute(b))) + transmute(vpadd_s16_(transmute(a), transmute(b))) } /// Add pairwise. #[inline] @@ -317,7 +317,7 @@ pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - transmute(vpadd_s32_(transmute(a),transmute(b))) + transmute(vpadd_s32_(transmute(a), transmute(b))) } /// Add pairwise. #[inline] @@ -326,7 +326,7 @@ pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - transmute(vpadd_s8_(transmute(a),transmute(b))) + transmute(vpadd_s8_(transmute(a), transmute(b))) } /// Unsigned saturating extract narrow. diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs index 202df0143ccc..4b71d6c2bff8 100644 --- a/library/stdarch/crates/core_arch/src/simd.rs +++ b/library/stdarch/crates/core_arch/src/simd.rs @@ -90,16 +90,44 @@ simd_ty!(i16x2[i16]: i16, i16 | x0, x1); // 64-bit wide types: -simd_ty!(u8x8[u8]: - u8, u8, u8, u8, u8, u8, u8, u8 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + u8x8[u8]: u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3); simd_ty!(u32x2[u32]: u32, u32 | x0, x1); simd_ty!(u64x1[u64]: u64 | x1); -simd_ty!(i8x8[i8]: - i8, i8, i8, i8, i8, i8, i8, i8 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + i8x8[i8]: i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3); simd_ty!(i32x2[i32]: i32, i32 | x0, x1); simd_ty!(i64x1[i64]: i64 | x1); @@ -108,116 +136,576 @@ simd_ty!(f32x2[f32]: f32, f32 | x0, x1); // 128-bit wide types: -simd_ty!(u8x16[u8]: - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +simd_ty!( + u8x16[u8]: u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + u16x8[u16]: u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 ); -simd_ty!(u16x8[u16]: - u16, u16, u16, u16, u16, u16, u16, u16 - | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3); simd_ty!(u64x2[u64]: u64, u64 | x0, x1); -simd_ty!(i8x16[i8]: - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +simd_ty!( + i8x16[i8]: i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + i16x8[i16]: i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 ); -simd_ty!(i16x8[i16]: - i16, i16, i16, i16, i16, i16, i16, i16 - | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); simd_ty!(i64x2[i64]: i64, i64 | x0, x1); simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3); simd_ty!(f64x2[f64]: f64, f64 | x0, x1); -simd_m_ty!(m8x16[i8]: - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +simd_m_ty!( + m8x16[i8]: i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_m_ty!( + m16x8[i16]: i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 ); -simd_m_ty!(m16x8[i16]: - i16, i16, i16, i16, i16, i16, i16, i16 - | x0, x1, x2, x3, x4, x5, x6, x7); simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3); simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1); // 256-bit wide types: -simd_ty!(u8x32[u8]: - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 +simd_ty!( + u8x32[u8]: u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8, + u8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 ); -simd_ty!(u16x16[u16]: - u16, u16, u16, u16, u16, u16, u16, u16, - u16, u16, u16, u16, u16, u16, u16, u16 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +simd_ty!( + u16x16[u16]: u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16, + u16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + u32x8[u32]: u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 ); -simd_ty!(u32x8[u32]: - u32, u32, u32, u32, u32, u32, u32, u32 - | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3); -simd_ty!(i8x32[i8]: - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8, - i8, i8, i8, i8, i8, i8, i8, i8 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15, - x16, x17, x18, x19, x20, x21, x22, x23, - x24, x25, x26, x27, x28, x29, x30, x31 +simd_ty!( + i8x32[i8]: i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8, + i8 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15, + x16, + x17, + x18, + x19, + x20, + x21, + x22, + x23, + x24, + x25, + x26, + x27, + x28, + x29, + x30, + x31 ); -simd_ty!(i16x16[i16]: - i16, i16, i16, i16, i16, i16, i16, i16, - i16, i16, i16, i16, i16, i16, i16, i16 - | x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 +simd_ty!( + i16x16[i16]: i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16, + i16 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); +simd_ty!( + i32x8[i32]: i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 ); -simd_ty!(i32x8[i32]: - i32, i32, i32, i32, i32, i32, i32, i32 - | x0, x1, x2, x3, x4, x5, x6, x7); simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3); -simd_ty!(f32x8[f32]: - f32, f32, f32, f32, f32, f32, f32, f32 | - x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + f32x8[f32]: f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); // 512-bit wide types: -simd_ty!(i32x16[i32]: - i32, i32, i32, i32, i32, i32, i32, i32, - i32, i32, i32, i32, i32, i32, i32, i32 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15); +simd_ty!( + i32x16[i32]: i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32, + i32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); -simd_ty!(u32x16[u32]: - u32, u32, u32, u32, u32, u32, u32, u32, - u32, u32, u32, u32, u32, u32, u32, u32 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15); +simd_ty!( + u32x16[u32]: u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32, + u32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); -simd_ty!(f32x16[f32]: - f32, f32, f32, f32, f32, f32, f32, f32, - f32, f32, f32, f32, f32, f32, f32, f32 - | x0, x1, x2, x3, x4, x5, x6, x7, - x8, x9, x10, x11, x12, x13, x14, x15); +simd_ty!( + f32x16[f32]: f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32, + f32 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7, + x8, + x9, + x10, + x11, + x12, + x13, + x14, + x15 +); -simd_ty!(i64x8[i64]: - i64, i64, i64, i64, i64, i64, i64, i64 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + i64x8[i64]: i64, + i64, + i64, + i64, + i64, + i64, + i64, + i64 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); -simd_ty!(u64x8[u64]: - u64, u64, u64, u64, u64, u64, u64, u64 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + u64x8[u64]: u64, + u64, + u64, + u64, + u64, + u64, + u64, + u64 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); -simd_ty!(f64x8[f64]: - f64, f64, f64, f64, f64, f64, f64, f64 - | x0, x1, x2, x3, x4, x5, x6, x7); +simd_ty!( + f64x8[f64]: f64, + f64, + f64, + f64, + f64, + f64, + f64, + f64 | x0, + x1, + x2, + x3, + x4, + x5, + x6, + x7 +); diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs index 4744b435dd7a..2160744a5c76 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -49,6 +49,44 @@ pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i { transmute(simd_select_bitmask(k, abs, zero)) } +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_abs_epi64&expand=48) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i { + let a = a.as_i64x8(); + // all-0 is a properly initialized i64x8 + let zero: i64x8 = mem::zeroed(); + let sub = simd_sub(zero, a); + let cmp: i64x8 = simd_gt(a, zero); + transmute(simd_select(cmp, a, sub)) +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_abs_epi64&expand=49) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { + let abs = _mm512_abs_epi64(a).as_i64x8(); + transmute(simd_select_bitmask(k, abs, src.as_i64x8())) +} + +/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_abs_epi64&expand=50) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpabsq))] +pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i { + let abs = _mm512_abs_epi64(a).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, abs, zero)) +} + /// Returns vector of type `__m512d` with all elements set to zero. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_pd) @@ -854,6 +892,1282 @@ pub unsafe fn _mm512_mask_i64scatter_epi32( constify_imm8_gather!(scale, call); } +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi32&expand=4685) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprold(a.as_i32x16(), imm8)) +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi32&expand=4683) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprold(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, rol, src.as_i32x16())) +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi32&expand=4684) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprold, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprold(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, rol, zero)) +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi32&expand=4721) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprold, imm8 = 233))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprord(a.as_i32x16(), imm8)) +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi32&expand=4719) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprord(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, ror, src.as_i32x16())) +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi32&expand=4720) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprold, imm8 = 123))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprord(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, ror, zero)) +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rol_epi64&expand=4694) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprolq(a.as_i64x8(), imm8)) +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rol_epi64&expand=4692) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprolq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, rol, src.as_i64x8())) +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rol_epi64&expand=4693) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let rol = vprolq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, rol, zero)) +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_ror_epi64&expand=4730) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + transmute(vprorq(a.as_i64x8(), imm8)) +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_ror_epi64&expand=4728) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprorq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, ror, src.as_i64x8())) +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_ror_epi64&expand=4729) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i { + assert!(imm8 >= 0 && imm8 <= 255); + let ror = vprorq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, ror, zero)) +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi32&expand=5310) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsllid(a.as_i32x16(), imm8)) +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi32&expand=5308) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsllid(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi32&expand=5309) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsllid(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi32&expand=5522) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsrlid(a.as_i32x16(), imm8)) +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi32&expand=5520) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsrlid(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi32&expand=5521) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsrlid(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_slli_epi64&expand=5319) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpslliq(a.as_i64x8(), imm8)) +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_slli_epi64&expand=5317) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpslliq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_slli_epi64&expand=5318) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpslliq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srli_epi64&expand=5531) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsrliq(a.as_i64x8(), imm8)) +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srli_epi64&expand=5529) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsrliq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srli_epi64&expand=5530) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsrliq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi32&expand=5280) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpslld(a.as_i32x16(), count.as_i32x4())) +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi32&expand=5278) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_mask_sll_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi32&expand=5279) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpslld))] +pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sll_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi32&expand=5492) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrld(a.as_i32x16(), count.as_i32x4())) +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi32&expand=5490) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_mask_srl_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srl_epi32&expand=5491) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrld))] +pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_srl_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sll_epi64&expand=5289) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsllq(a.as_i64x8(), count.as_i64x2())) +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sll_epi64&expand=5287) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_mask_sll_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllq))] +pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sll_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srl_epi64&expand=5501) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrlq(a.as_i64x8(), count.as_i64x2())) +} + +/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srl_epi64&expand=5499) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_mask_srl_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sll_epi64&expand=5288) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlq))] +pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_srl_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi32&expand=5407) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsrad(a.as_i32x16(), count.as_i32x4())) +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi32&expand=5405) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_mask_sra_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi32&expand=5406) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrad))] +pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sra_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sra_epi64&expand=5416) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i { + transmute(vpsraq(a.as_i64x8(), count.as_i64x2())) +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sra_epi64&expand=5414) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_mask_sra_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m128i, +) -> __m512i { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sra_epi64&expand=5415) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsraq))] +pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i { + let shf = _mm512_sra_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi32&expand=5436) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsraid(a.as_i32x16(), imm8)) +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi32&expand=5434) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsraid(a.as_i32x16(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi32&expand=5435) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsraid(a.as_i32x16(), imm8); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srai_epi64&expand=5445) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + transmute(vpsraiq(a.as_i64x8(), imm8)) +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srai_epi64&expand=5443) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] +#[rustc_args_required_const(3)] +pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsraiq(a.as_i64x8(), imm8); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srai_epi64&expand=5444) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))] +#[rustc_args_required_const(2)] +pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i { + assert!(imm8 <= 255); + let shf = vpsraiq(a.as_i64x8(), imm8); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi32&expand=5465) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsravd(a.as_i32x16(), count.as_i32x16())) +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi32&expand=5463) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_mask_srav_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi32&expand=5464) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsravd))] +pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srav_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srav_epi64&expand=5474) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsravq(a.as_i64x8(), count.as_i64x8())) +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srav_epi64&expand=5472) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_mask_srav_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srav_epi64&expand=5473) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsravq))] +pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srav_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi32&expand=4703) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vprolvd(a.as_i32x16(), b.as_i32x16())) +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi32&expand=4701) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_mask_rolv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, rol, src.as_i32x16())) +} + +/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi32&expand=4702) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolvd))] +pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, rol, zero)) +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi32&expand=4739) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(vprorvd(a.as_i32x16(), b.as_i32x16())) +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi32&expand=4737) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_mask_rorv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + b: __m512i, +) -> __m512i { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, ror, src.as_i32x16())) +} + +/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi32&expand=4738) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprorvd))] +pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, ror, zero)) +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rolv_epi64&expand=4712) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(vprolvq(a.as_i64x8(), b.as_i64x8())) +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rolv_epi64&expand=4710) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, rol, src.as_i64x8())) +} + +/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rolv_epi64&expand=4711) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprolvq))] +pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let rol = _mm512_rolv_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, rol, zero)) +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_rorv_epi64&expand=4748) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(vprorvq(a.as_i64x8(), b.as_i64x8())) +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_rorv_epi64&expand=4746) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, ror, src.as_i64x8())) +} + +/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_rorv_epi64&expand=4747) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vprorvq))] +pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let ror = _mm512_rorv_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, ror, zero)) +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi32&expand=5342) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsllvd(a.as_i32x16(), count.as_i32x16())) +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi32&expand=5340) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_mask_sllv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi32&expand=5341) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllvd))] +pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_sllv_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi32&expand=5554) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16())) +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_srlv_epi32&expand=5552) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_mask_srlv_epi32( + src: __m512i, + k: __mmask16, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + transmute(simd_select_bitmask(k, shf, src.as_i32x16())) +} + +/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi32&expand=5553) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlvd))] +pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srlv_epi32(a, count).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_sllv_epi64&expand=5351) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsllvq(a.as_i64x8(), count.as_i64x8())) +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_sllv_epi64&expand=5349) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_mask_sllv_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_sllv_epi64&expand=5350) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsllvq))] +pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_sllv_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_srlv_epi64&expand=5563) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i { + transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8())) +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mask_srlv_epi64&expand=5561) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_mask_srlv_epi64( + src: __m512i, + k: __mmask8, + a: __m512i, + count: __m512i, +) -> __m512i { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + transmute(simd_select_bitmask(k, shf, src.as_i64x8())) +} + +/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_srlv_epi64&expand=5562) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpsrlvq))] +pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i { + let shf = _mm512_srlv_epi64(a, count).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, shf, zero)) +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi32&expand=272) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i32x16(), b.as_i32x16())) +} + +/// Performs element-by-element bitwise AND between packed 32-bit integer elements of v2 and v3, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi32&expand=273) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, and, src.as_i32x16())) +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_epi32&expand=274) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpandd))] +pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, and, zero)) +} + +/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_epi64&expand=279) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i64x8(), b.as_i64x8())) +} + +/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_and_epi64&expand=280) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, and, src.as_i64x8())) +} + +/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_and_Epi32&expand=274) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let and = _mm512_and_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, and, zero)) +} + +/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_and_si512&expand=302) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpandq))] +pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_and(a.as_i32x16(), b.as_i32x16())) +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi32&expand=4042) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i32x16(), b.as_i32x16())) +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi32&expand=4040) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpord))] +pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, or, src.as_i32x16())) +} + +/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi32&expand=4041) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpord))] +pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, or, zero)) +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_epi64&expand=4051) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i64x8(), b.as_i64x8())) +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_or_epi64&expand=4049) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, or, src.as_i64x8())) +} + +/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_or_epi64&expand=4050) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let or = _mm512_or_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, or, zero)) +} + +/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_or_si512&expand=4072) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vporq))] +pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_or(a.as_i32x16(), b.as_i32x16())) +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi32&expand=6142) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi32&expand=6140) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpxord))] +pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + transmute(simd_select_bitmask(k, xor, src.as_i32x16())) +} + +/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi32&expand=6141) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpxord))] +pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi32(a, b).as_i32x16(); + let zero = _mm512_setzero_si512().as_i32x16(); + transmute(simd_select_bitmask(k, xor, zero)) +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_epi64&expand=6151) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i64x8(), b.as_i64x8())) +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_mask_xor_epi64&expand=6149) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + transmute(simd_select_bitmask(k, xor, src.as_i64x8())) +} + +/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_maskz_xor_epi64&expand=6150) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i { + let xor = _mm512_xor_epi64(a, b).as_i64x8(); + let zero = _mm512_setzero_si512().as_i64x8(); + transmute(simd_select_bitmask(k, xor, zero)) +} + +/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_xor_si512&expand=6172) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vpxorq))] +pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i { + transmute(simd_xor(a.as_i32x16(), b.as_i32x16())) +} + +/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw +pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(kandw(a, b)) +} + +/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kand&expand=3210) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw +pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(kandw(a, b)) +} + +/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw +pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(korw(a, b)) +} + +/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kor&expand=3237) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw +pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(korw(a, b)) +} + +/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw +pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(kxorw(a, b)) +} + +/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=512_kxor&expand=3289) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw +pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 { + transmute(kxorw(a, b)) +} + /// Sets packed 32-bit integers in `dst` with the supplied values. /// /// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_ps) @@ -2474,6 +3788,73 @@ extern "C" { fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; #[link_name = "llvm.x86.avx512.mask.cmp.d.512"] fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16; + + #[link_name = "llvm.x86.avx512.mask.prol.d.512"] + fn vprold(a: i32x16, i8: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.pror.d.512"] + fn vprord(a: i32x16, i8: i32) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prol.q.512"] + fn vprolq(a: i64x8, i8: i32) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.pror.q.512"] + fn vprorq(a: i64x8, i8: i32) -> i64x8; + + #[link_name = "llvm.x86.avx512.mask.prolv.d.512"] + fn vprolvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prorv.d.512"] + fn vprorvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.mask.prolv.q.512"] + fn vprolvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.mask.prorv.q.512"] + fn vprorvq(a: i64x8, b: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.psllv.d.512"] + fn vpsllvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psrlv.d.512"] + fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psllv.q.512"] + fn vpsllvq(a: i64x8, b: i64x8) -> i64x8; + #[link_name = "llvm.x86.avx512.psrlv.q.512"] + fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.pslli.d.512"] + fn vpsllid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.psrli.d.512"] + fn vpsrlid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.pslli.q.512"] + fn vpslliq(a: i64x8, imm8: u32) -> i64x8; + #[link_name = "llvm.x86.avx512.psrli.q.512"] + fn vpsrliq(a: i64x8, imm8: u32) -> i64x8; + + #[link_name = "llvm.x86.avx512.psll.d.512"] + fn vpslld(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psrl.d.512"] + fn vpsrld(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psll.q.512"] + fn vpsllq(a: i64x8, count: i64x2) -> i64x8; + #[link_name = "llvm.x86.avx512.psrl.q.512"] + fn vpsrlq(a: i64x8, count: i64x2) -> i64x8; + + #[link_name = "llvm.x86.avx512.psra.d.512"] + fn vpsrad(a: i32x16, count: i32x4) -> i32x16; + #[link_name = "llvm.x86.avx512.psra.q.512"] + fn vpsraq(a: i64x8, count: i64x2) -> i64x8; + + #[link_name = "llvm.x86.avx512.psrai.d.512"] + fn vpsraid(a: i32x16, imm8: u32) -> i32x16; + #[link_name = "llvm.x86.avx512.psrai.q.512"] + fn vpsraiq(a: i64x8, imm8: u32) -> i64x8; + + #[link_name = "llvm.x86.avx512.psrav.d.512"] + fn vpsravd(a: i32x16, count: i32x16) -> i32x16; + #[link_name = "llvm.x86.avx512.psrav.q.512"] + fn vpsravq(a: i64x8, count: i64x8) -> i64x8; + + #[link_name = "llvm.x86.avx512.kand.w"] + fn kandw(ma: u16, mb: u16) -> u16; + #[link_name = "llvm.x86.avx512.kor.w"] + fn korw(ma: u16, mb: u16) -> u16; + #[link_name = "llvm.x86.avx512.kxor.w"] + fn kxorw(ma: u16, mb: u16) -> u16; } #[cfg(test)] @@ -3452,4 +4833,1302 @@ mod tests { let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.)); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rol_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_rol_epi32(a, 1); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rol_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_mask_rol_epi32(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1); + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rol_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + let r = _mm512_maskz_rol_epi32(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ror_epi32() { + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_ror_epi32(a, 1); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ror_epi32() { + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_mask_ror_epi32(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1); + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ror_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0); + let r = _mm512_maskz_ror_epi32(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_slli_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_slli_epi32(a, 1); + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_slli_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r = _mm512_mask_slli_epi32(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_slli_epi32(a, 0b11111111_11111111, a, 1); + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_slli_epi32() { + let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + let r = _mm512_maskz_slli_epi32(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_slli_epi32(0b00000000_11111111, a, 1); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srli_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_srli_epi32(a, 1); + let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srli_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let r = _mm512_mask_srli_epi32(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1); + let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srli_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0); + let r = _mm512_maskz_srli_epi32(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rolv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_rolv_epi32(a, b); + + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rolv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_mask_rolv_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b); + + let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rolv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_maskz_rolv_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b); + + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rorv_epi32() { + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_rorv_epi32(a, b); + + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rorv_epi32() { + let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let b = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_mask_rorv_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b); + + let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rorv_epi32() { + let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0); + let b = _mm512_set_epi32(2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_maskz_rorv_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b); + + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sllv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_sllv_epi32(a, count); + + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sllv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_mask_sllv_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count); + + let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sllv_epi32() { + let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31); + let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_maskz_sllv_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count); + + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srlv_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_srlv_epi32(a, count); + + let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srlv_epi32() { + let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let count = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_mask_srlv_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count); + + let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srlv_epi32() { + let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0); + let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + + let r = _mm512_maskz_srlv_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count); + + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sll_epi32() { + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_sll_epi32(a, count); + let e = _mm512_set_epi32( + 0, + 1 << 2, + 1 << 3, + 1 << 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sll_epi32() { + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_sll_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32( + 0, + 1 << 2, + 1 << 3, + 1 << 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sll_epi32() { + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 31, + ); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_sll_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srl_epi32() { + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_srl_epi32(a, count); + let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srl_epi32() { + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_srl_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srl_epi32() { + let a = _mm512_set_epi32( + 1 << 31, + 1 << 0, + 1 << 1, + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 31, + ); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_srl_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let count = _mm_set_epi32(1, 0, 0, 2); + let r = _mm512_sra_epi32(a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16); + let count = _mm_set_epi32(0, 0, 0, 2); + let r = _mm512_mask_sra_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sra_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14); + let count = _mm_set_epi32(2, 0, 0, 2); + let r = _mm512_maskz_sra_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r = _mm512_srav_epi32(a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); + let r = _mm512_mask_srav_epi32(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count); + let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srav_epi32() { + let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14); + let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2); + let r = _mm512_maskz_srav_epi32(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15); + let r = _mm512_srai_epi32(a, 2); + let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); + let r = _mm512_mask_srai_epi32(a, 0, a, 2); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2); + let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srai_epi32() { + let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15); + let r = _mm512_maskz_srai_epi32(0, a, 2); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_and_epi32(a, b); + let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_and_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_mask_and_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_and_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_maskz_and_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_si512() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_and_epi32(a, b); + let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_or_epi32(a, b); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_or_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_mask_or_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_or_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_maskz_or_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32( + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_si512() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_or_epi32(a, b); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_xor_epi32(a, b); + let e = _mm512_set_epi32( + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_xor_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_mask_xor_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b); + let e = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_xor_epi32() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_maskz_xor_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b); + let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_si512() { + let a = _mm512_set_epi32( + 1 << 1 | 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 3, + ); + let b = _mm512_set_epi32( + 1 << 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 3 | 1 << 4, + ); + let r = _mm512_xor_epi32(a, b); + let e = _mm512_set_epi32( + 1 << 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kand() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b11001100_00110011; + let r = _mm512_kand(a, b); + let e: u16 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_kand_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b11001100_00110011; + let r = _kand_mask16(a, b); + let e: u16 = 0b11001100_00110011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kor() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kor(a, b); + let e: u16 = 0b11101110_00111011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_kor_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _kor_mask16(a, b); + let e: u16 = 0b11101110_00111011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_kxor() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _mm512_kxor(a, b); + let e: u16 = 0b11100010_00111000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_kxor_mask16() { + let a: u16 = 0b11001100_00110011; + let b: u16 = 0b00101110_00001011; + let r = _kxor_mask16(a, b); + let e: u16 = 0b11100010_00111000; + assert_eq!(r, e); + } } diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs index ce2970ee517f..aa6857ecce4c 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs @@ -49,6 +49,42 @@ mod tests { use crate::core_arch::x86::*; use crate::core_arch::x86_64::*; + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let r = _mm512_abs_epi64(a); + let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_epi64() { + #[rustfmt::skip] + let a = _mm512_setr_epi64( + 0, 1, -1, i64::MAX, + i64::MIN, 100, -100, -32 + ); + let r = _mm512_mask_abs_epi64(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi64(a, 0b11111111, a); + let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_abs_epi64() { + #[rustfmt::skip] + let a = _mm512_setr_epi64( + 0, 1, -1, i64::MAX, + i64::MIN, 100, -100, -32 + ); + let r = _mm512_maskz_abs_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi64(0b01111111, a); + let e = _mm512_setr_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 0); + assert_eq_m512i(r, e); + } + #[simd_test(enable = "avx512f")] unsafe fn test_mm512_setzero_pd() { assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.)); @@ -854,4 +890,1031 @@ mod tests { } assert_eq!(&arr[..], &expected[..],); } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rol_epi64() { + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let r = _mm512_rol_epi64(a, 1); + let e = _mm512_set_epi64( + 1 << 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rol_epi64() { + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let r = _mm512_mask_rol_epi64(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rol_epi64(a, 0b11111111, a, 1); + let e = _mm512_set_epi64( + 1 << 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rol_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 63, + ); + let r = _mm512_maskz_rol_epi64(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rol_epi64(0b00001111, a, 1); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ror_epi64() { + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let r = _mm512_ror_epi64(a, 1); + let e = _mm512_set_epi64( + 1 << 63, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ror_epi64() { + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let r = _mm512_mask_ror_epi64(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_ror_epi64(a, 0b11111111, a, 1); + let e = _mm512_set_epi64( + 1 << 63, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ror_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); + let r = _mm512_maskz_ror_epi64(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_ror_epi64(0b00001111, a, 1); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_slli_epi64() { + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let r = _mm512_slli_epi64(a, 1); + let e = _mm512_set_epi64( + 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_slli_epi64() { + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let r = _mm512_mask_slli_epi64(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_slli_epi64(a, 0b11111111, a, 1); + let e = _mm512_set_epi64( + 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_slli_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 63, + ); + let r = _mm512_maskz_slli_epi64(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_slli_epi64(0b00001111, a, 1); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srli_epi64() { + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let r = _mm512_srli_epi64(a, 1); + let e = _mm512_set_epi64( + 0, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srli_epi64() { + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let r = _mm512_mask_srli_epi64(a, 0, a, 1); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srli_epi64(a, 0b11111111, a, 1); + let e = _mm512_set_epi64( + 0, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srli_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); + let r = _mm512_maskz_srli_epi64(0, a, 1); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srli_epi64(0b00001111, a, 1); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rolv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_rolv_epi64(a, b); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 34, + 1 << 35, + 1 << 36, + 1 << 37, + 1 << 38, + 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rolv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_rolv_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 34, + 1 << 35, + 1 << 36, + 1 << 37, + 1 << 38, + 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rolv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 62, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2); + let r = _mm512_maskz_rolv_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rolv_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rorv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_rorv_epi64(a, b); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 30, + 1 << 29, + 1 << 28, + 1 << 27, + 1 << 26, + 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rorv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_rorv_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + + let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 30, + 1 << 29, + 1 << 28, + 1 << 27, + 1 << 26, + 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rorv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2); + let r = _mm512_maskz_rorv_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_rorv_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 1 << 62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sllv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm512_set_epi64(0, 2, 2, 3, 4, 5, 6, 7); + let r = _mm512_sllv_epi64(a, count); + let e = _mm512_set_epi64( + 1 << 32, + 0, + 1 << 34, + 1 << 35, + 1 << 36, + 1 << 37, + 1 << 38, + 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sllv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_sllv_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64( + 1 << 32, + 1 << 33, + 0, + 1 << 35, + 1 << 36, + 1 << 37, + 1 << 38, + 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sllv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 63, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 1); + let r = _mm512_maskz_sllv_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sllv_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srlv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_srlv_epi64(a, count); + let e = _mm512_set_epi64( + 1 << 32, + 0, + 1 << 30, + 1 << 29, + 1 << 28, + 1 << 27, + 1 << 26, + 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srlv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_srlv_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64( + 1 << 32, + 0, + 1 << 30, + 1 << 29, + 1 << 28, + 1 << 27, + 1 << 26, + 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srlv_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_srlv_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srlv_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sll_epi64() { + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_sll_epi64(a, count); + let e = _mm512_set_epi64( + 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); + assert_eq_m512i(r, e); + + let count = _mm_set_epi64x(1, 0); + let r = _mm512_sll_epi64(a, count); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sll_epi64() { + let a = _mm512_set_epi64( + 1 << 63, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_mask_sll_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64( + 0, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sll_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 63, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_maskz_sll_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sll_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srl_epi64() { + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_srl_epi64(a, count); + let e = _mm512_set_epi64( + 0, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srl_epi64() { + let a = _mm512_set_epi64( + 1 << 0, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_mask_srl_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64( + 0, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srl_epi64() { + let a = _mm512_set_epi64( + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 32, + 1 << 0, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_maskz_srl_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srl_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_sra_epi64(a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_mask_sra_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_sra_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_maskz_sra_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_sra_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_srav_epi64(a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_mask_srav_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srav_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_maskz_srav_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srav_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_srai_epi64(a, 2); + let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_mask_srai_epi64(a, 0, a, 2); + assert_eq_m512i(r, a); + + let r = _mm512_mask_srai_epi64(a, 0b11111111, a, 2); + let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_maskz_srai_epi64(0, a, 2); + assert_eq_m512i(r, _mm512_setzero_si512()); + + let r = _mm512_maskz_srai_epi64(0b00001111, a, 2); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_and_epi64(a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_and_epi64(a, 0, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + + let r = _mm512_mask_and_epi64(a, 0b01111111, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_and_epi64(0, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + + let r = _mm512_maskz_and_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_and_epi64(a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_or_epi64(a, b); + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_or_epi64(a, 0, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + + let r = _mm512_mask_or_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_or_epi64(0, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + + let r = _mm512_maskz_or_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_or_epi64(a, b); + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, + 0, + 0, + 0, + 0, + 0, + 0, + 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_xor_epi64(a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_xor_epi64(a, 0, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + + let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_xor_epi64(0, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + + let r = _mm512_maskz_xor_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_xor_epi64(a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } }