add some avx512f intrinsics(mask, rotation, shift) (#884)
This commit is contained in:
parent
67217c5d11
commit
1edc72e825
6 changed files with 4436 additions and 95 deletions
107
library/stdarch/crates/core_arch/avx512f.md
Normal file
107
library/stdarch/crates/core_arch/avx512f.md
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
<summary>["AVX512F"]</summary><p>
|
||||
* [x] [`_mm512_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi32&expand=5236)
|
||||
* [x] [`_mm512_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_epi64&expand=5236)
|
||||
* [x] [`_mm512_and_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_and_si512&expand=5236)
|
||||
* [x] [`_mm512_kand`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kand&expand=5236)
|
||||
* [x] [`_mm512_kor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kor&expand=5236)
|
||||
* [x] [`_mm512_kxor`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_kxor&expand=5236)
|
||||
* [x] [`_kand_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kand_mask16&expand=3212)
|
||||
* [x] [`_kor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kor_mask16&expand=3239)
|
||||
* [x] [`_kxor_mask16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=kxor_mask16&expand=3291)
|
||||
* [x] [`_mm512_mask_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_and_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_or_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rol_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rolv_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_ror_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_rorv_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sll_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_slli_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sllv_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_sra_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srai_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srav_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srl_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srli_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_srlv_epi64&expand=5236)
|
||||
* [x] [`_mm512_mask_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi32&expand=5236)
|
||||
* [x] [`_mm512_mask_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_xor_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_and_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_and_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_and_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_or_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rol_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rolv_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_ror_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_rorv_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sll_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_slli_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sllv_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_sra_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srai_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srav_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srl_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srli_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_srlv_epi64&expand=5236)
|
||||
* [x] [`_mm512_maskz_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi32&expand=5236)
|
||||
* [x] [`_mm512_maskz_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_xor_epi64&expand=5236)
|
||||
* [x] [`_mm512_or_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi32&expand=5236)
|
||||
* [x] [`_mm512_or_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_epi64&expand=5236)
|
||||
* [x] [`_mm512_or_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_or_si512&expand=5236)
|
||||
* [x] [`_mm512_rol_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi32&expand=5236)
|
||||
* [x] [`_mm512_rol_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rol_epi64&expand=5236)
|
||||
* [x] [`_mm512_rolv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi32&expand=5236)
|
||||
* [x] [`_mm512_rolv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rolv_epi64&expand=5236)
|
||||
* [x] [`_mm512_ror_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi32&expand=5236)
|
||||
* [x] [`_mm512_ror_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_ror_epi64&expand=5236)
|
||||
* [x] [`_mm512_rorv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi32&expand=5236)
|
||||
* [x] [`_mm512_rorv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_rorv_epi64&expand=5236)
|
||||
* [x] [`_mm512_sll_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi32&expand=5236)
|
||||
* [x] [`_mm512_sll_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sll_epi64&expand=5236)
|
||||
* [x] [`_mm512_slli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi32&expand=5236)
|
||||
* [x] [`_mm512_slli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_slli_epi64&expand=5236)
|
||||
* [x] [`_mm512_sllv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi32&expand=5236)
|
||||
* [x] [`_mm512_sllv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sllv_epi64&expand=5236)
|
||||
* [x] [`_mm512_sra_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi32&expand=5236)
|
||||
* [x] [`_mm512_sra_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_sra_epi64&expand=5236)
|
||||
* [x] [`_mm512_srai_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi32&expand=5236)
|
||||
* [x] [`_mm512_srai_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srai_epi64&expand=5236)
|
||||
* [x] [`_mm512_srav_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi32&expand=5236)
|
||||
* [x] [`_mm512_srav_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srav_epi64&expand=5236)
|
||||
* [x] [`_mm512_srl_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi32&expand=5236)
|
||||
* [x] [`_mm512_srl_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srl_epi64&expand=5236)
|
||||
* [x] [`_mm512_srli_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi32&expand=5236)
|
||||
* [x] [`_mm512_srli_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srli_epi64&expand=5236)
|
||||
* [x] [`_mm512_srlv_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi32&expand=5236)
|
||||
* [x] [`_mm512_srlv_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_srlv_epi64&expand=5236)
|
||||
* [x] [`_mm512_xor_epi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi32&expand=5236)
|
||||
* [x] [`_mm512_xor_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_epi64&expand=5236)
|
||||
* [x] [`_mm512_xor_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_xor_si512&expand=5236)
|
||||
</p>
|
||||
|
|
@ -88,7 +88,7 @@ extern "C" {
|
|||
fn vpaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
|
||||
#[link_name = "llvm.aarch64.neon.addp.v16i8"]
|
||||
fn vpaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
|
||||
|
||||
|
||||
#[link_name = "llvm.aarch64.neon.saddv.i32.v4i16"]
|
||||
fn vaddv_s16_(a: int16x4_t) -> i16;
|
||||
#[link_name = "llvm.aarch64.neon.saddv.i32.v2i32"]
|
||||
|
|
@ -1826,9 +1826,13 @@ mod tests {
|
|||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vpaddq_s8() {
|
||||
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let b = i8x16::new(0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15);
|
||||
let b = i8x16::new(
|
||||
0, -1, -2, -3, -4, -5, -6, -7, -8, -8, -10, -11, -12, -13, -14, -15,
|
||||
);
|
||||
let r: i8x16 = transmute(vpaddq_s8(transmute(a), transmute(b)));
|
||||
let e = i8x16::new(3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29);
|
||||
let e = i8x16::new(
|
||||
3, 7, 11, 15, 19, 23, 27, 31, -1, -5, -9, -13, -16, -21, -25, -29,
|
||||
);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
|
|
@ -2829,7 +2833,7 @@ mod tests {
|
|||
let e = i64x2::new(i64::MIN, i64::MAX);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vaddv_s16() {
|
||||
let a = i16x4::new(1, 2, 3, -4);
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ extern "C" {
|
|||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")]
|
||||
fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t;
|
||||
|
||||
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")]
|
||||
fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
|
||||
|
|
@ -299,7 +299,7 @@ pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
|
||||
pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
|
||||
vpadd_s8_(a,b)
|
||||
vpadd_s8_(a, b)
|
||||
}
|
||||
/// Add pairwise.
|
||||
#[inline]
|
||||
|
|
@ -308,7 +308,7 @@ pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
|
||||
pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
|
||||
transmute(vpadd_s16_(transmute(a),transmute(b)))
|
||||
transmute(vpadd_s16_(transmute(a), transmute(b)))
|
||||
}
|
||||
/// Add pairwise.
|
||||
#[inline]
|
||||
|
|
@ -317,7 +317,7 @@ pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
|
||||
pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
|
||||
transmute(vpadd_s32_(transmute(a),transmute(b)))
|
||||
transmute(vpadd_s32_(transmute(a), transmute(b)))
|
||||
}
|
||||
/// Add pairwise.
|
||||
#[inline]
|
||||
|
|
@ -326,7 +326,7 @@ pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))]
|
||||
pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
|
||||
transmute(vpadd_s8_(transmute(a),transmute(b)))
|
||||
transmute(vpadd_s8_(transmute(a), transmute(b)))
|
||||
}
|
||||
|
||||
/// Unsigned saturating extract narrow.
|
||||
|
|
|
|||
|
|
@ -90,16 +90,44 @@ simd_ty!(i16x2[i16]: i16, i16 | x0, x1);
|
|||
|
||||
// 64-bit wide types:
|
||||
|
||||
simd_ty!(u8x8[u8]:
|
||||
u8, u8, u8, u8, u8, u8, u8, u8
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(
|
||||
u8x8[u8]: u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(u16x4[u16]: u16, u16, u16, u16 | x0, x1, x2, x3);
|
||||
simd_ty!(u32x2[u32]: u32, u32 | x0, x1);
|
||||
simd_ty!(u64x1[u64]: u64 | x1);
|
||||
|
||||
simd_ty!(i8x8[i8]:
|
||||
i8, i8, i8, i8, i8, i8, i8, i8
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(
|
||||
i8x8[i8]: i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(i16x4[i16]: i16, i16, i16, i16 | x0, x1, x2, x3);
|
||||
simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
|
||||
simd_ty!(i64x1[i64]: i64 | x1);
|
||||
|
|
@ -108,116 +136,576 @@ simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
|
|||
|
||||
// 128-bit wide types:
|
||||
|
||||
simd_ty!(u8x16[u8]:
|
||||
u8, u8, u8, u8, u8, u8, u8, u8,
|
||||
u8, u8, u8, u8, u8, u8, u8, u8
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
|
||||
simd_ty!(
|
||||
u8x16[u8]: u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_ty!(
|
||||
u16x8[u16]: u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(u16x8[u16]:
|
||||
u16, u16, u16, u16, u16, u16, u16, u16
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(u32x4[u32]: u32, u32, u32, u32 | x0, x1, x2, x3);
|
||||
simd_ty!(u64x2[u64]: u64, u64 | x0, x1);
|
||||
|
||||
simd_ty!(i8x16[i8]:
|
||||
i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
|
||||
simd_ty!(
|
||||
i8x16[i8]: i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_ty!(
|
||||
i16x8[i16]: i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(i16x8[i16]:
|
||||
i16, i16, i16, i16, i16, i16, i16, i16
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(i32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
|
||||
simd_ty!(i64x2[i64]: i64, i64 | x0, x1);
|
||||
|
||||
simd_ty!(f32x4[f32]: f32, f32, f32, f32 | x0, x1, x2, x3);
|
||||
simd_ty!(f64x2[f64]: f64, f64 | x0, x1);
|
||||
|
||||
simd_m_ty!(m8x16[i8]:
|
||||
i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
|
||||
simd_m_ty!(
|
||||
m8x16[i8]: i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_m_ty!(
|
||||
m16x8[i16]: i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_m_ty!(m16x8[i16]:
|
||||
i16, i16, i16, i16, i16, i16, i16, i16
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_m_ty!(m32x4[i32]: i32, i32, i32, i32 | x0, x1, x2, x3);
|
||||
simd_m_ty!(m64x2[i64]: i64, i64 | x0, x1);
|
||||
|
||||
// 256-bit wide types:
|
||||
|
||||
simd_ty!(u8x32[u8]:
|
||||
u8, u8, u8, u8, u8, u8, u8, u8,
|
||||
u8, u8, u8, u8, u8, u8, u8, u8,
|
||||
u8, u8, u8, u8, u8, u8, u8, u8,
|
||||
u8, u8, u8, u8, u8, u8, u8, u8
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15,
|
||||
x16, x17, x18, x19, x20, x21, x22, x23,
|
||||
x24, x25, x26, x27, x28, x29, x30, x31
|
||||
simd_ty!(
|
||||
u8x32[u8]: u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8,
|
||||
u8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15,
|
||||
x16,
|
||||
x17,
|
||||
x18,
|
||||
x19,
|
||||
x20,
|
||||
x21,
|
||||
x22,
|
||||
x23,
|
||||
x24,
|
||||
x25,
|
||||
x26,
|
||||
x27,
|
||||
x28,
|
||||
x29,
|
||||
x30,
|
||||
x31
|
||||
);
|
||||
simd_ty!(u16x16[u16]:
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
u16, u16, u16, u16, u16, u16, u16, u16
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
|
||||
simd_ty!(
|
||||
u16x16[u16]: u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16,
|
||||
u16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_ty!(
|
||||
u32x8[u32]: u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(u32x8[u32]:
|
||||
u32, u32, u32, u32, u32, u32, u32, u32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(u64x4[u64]: u64, u64, u64, u64 | x0, x1, x2, x3);
|
||||
|
||||
simd_ty!(i8x32[i8]:
|
||||
i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8,
|
||||
i8, i8, i8, i8, i8, i8, i8, i8
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15,
|
||||
x16, x17, x18, x19, x20, x21, x22, x23,
|
||||
x24, x25, x26, x27, x28, x29, x30, x31
|
||||
simd_ty!(
|
||||
i8x32[i8]: i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8,
|
||||
i8 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15,
|
||||
x16,
|
||||
x17,
|
||||
x18,
|
||||
x19,
|
||||
x20,
|
||||
x21,
|
||||
x22,
|
||||
x23,
|
||||
x24,
|
||||
x25,
|
||||
x26,
|
||||
x27,
|
||||
x28,
|
||||
x29,
|
||||
x30,
|
||||
x31
|
||||
);
|
||||
simd_ty!(i16x16[i16]:
|
||||
i16, i16, i16, i16, i16, i16, i16, i16,
|
||||
i16, i16, i16, i16, i16, i16, i16, i16
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15
|
||||
simd_ty!(
|
||||
i16x16[i16]: i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16,
|
||||
i16 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
simd_ty!(
|
||||
i32x8[i32]: i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
simd_ty!(i32x8[i32]:
|
||||
i32, i32, i32, i32, i32, i32, i32, i32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
|
||||
|
||||
simd_ty!(f32x8[f32]:
|
||||
f32, f32, f32, f32, f32, f32, f32, f32 |
|
||||
x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(
|
||||
f32x8[f32]: f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
||||
// 512-bit wide types:
|
||||
|
||||
simd_ty!(i32x16[i32]:
|
||||
i32, i32, i32, i32, i32, i32, i32, i32,
|
||||
i32, i32, i32, i32, i32, i32, i32, i32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15);
|
||||
simd_ty!(
|
||||
i32x16[i32]: i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32,
|
||||
i32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
|
||||
simd_ty!(u32x16[u32]:
|
||||
u32, u32, u32, u32, u32, u32, u32, u32,
|
||||
u32, u32, u32, u32, u32, u32, u32, u32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15);
|
||||
simd_ty!(
|
||||
u32x16[u32]: u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32,
|
||||
u32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
|
||||
simd_ty!(f32x16[f32]:
|
||||
f32, f32, f32, f32, f32, f32, f32, f32,
|
||||
f32, f32, f32, f32, f32, f32, f32, f32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15);
|
||||
simd_ty!(
|
||||
f32x16[f32]: f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32,
|
||||
f32 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7,
|
||||
x8,
|
||||
x9,
|
||||
x10,
|
||||
x11,
|
||||
x12,
|
||||
x13,
|
||||
x14,
|
||||
x15
|
||||
);
|
||||
|
||||
simd_ty!(i64x8[i64]:
|
||||
i64, i64, i64, i64, i64, i64, i64, i64
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(
|
||||
i64x8[i64]: i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
||||
simd_ty!(u64x8[u64]:
|
||||
u64, u64, u64, u64, u64, u64, u64, u64
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(
|
||||
u64x8[u64]: u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64,
|
||||
u64 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
||||
simd_ty!(f64x8[f64]:
|
||||
f64, f64, f64, f64, f64, f64, f64, f64
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(
|
||||
f64x8[f64]: f64,
|
||||
f64,
|
||||
f64,
|
||||
f64,
|
||||
f64,
|
||||
f64,
|
||||
f64,
|
||||
f64 | x0,
|
||||
x1,
|
||||
x2,
|
||||
x3,
|
||||
x4,
|
||||
x5,
|
||||
x6,
|
||||
x7
|
||||
);
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue