diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index d2f558a90eea..5aed80071fd5 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -3640,6 +3640,132 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { transmute(a) } +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_high_n_s8(a: int8x16_t) -> int16x8_t { + static_assert!(N : i32 where N >= 0 && N <= 8); + let b: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + vshll_n_s8::(b) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_high_n_s16(a: int16x8_t) -> int32x4_t { + static_assert!(N : i32 where N >= 0 && N <= 16); + let b: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + vshll_n_s16::(b) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sshll2, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_high_n_s32(a: int32x4_t) -> int64x2_t { + static_assert!(N : i32 where N >= 0 && N <= 32); + let b: int32x2_t = simd_shuffle2(a, a, [2, 3]); + vshll_n_s32::(b) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_high_n_u8(a: uint8x16_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 0 && N <= 8); + let b: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + vshll_n_u8::(b) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_high_n_u16(a: uint16x8_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 0 && N <= 16); + let b: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]); + vshll_n_u16::(b) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ushll2, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_high_n_u32(a: uint32x4_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 0 && N <= 32); + let b: uint32x2_t = simd_shuffle2(a, a, [2, 3]); + vshll_n_u32::(b) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vshrn_n_s64::(b), [0, 1, 2, 3]) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(shrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vshrn_n_u64::(b), [0, 1, 2, 3]) +} + /// Transpose vectors #[inline] #[target_feature(enable = "neon")] @@ -7619,6 +7745,108 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vshll_high_n_s8() { + let a: i8x16 = i8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vshll_high_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_high_n_s16() { + let a: i16x8 = i16x8::new(0, 0, 1, 2, 1, 2, 3, 4); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vshll_high_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_high_n_s32() { + let a: i32x4 = i32x4::new(0, 0, 1, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vshll_high_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_high_n_u8() { + let a: u8x16 = u8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vshll_high_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_high_n_u16() { + let a: u16x8 = u16x8::new(0, 0, 1, 2, 1, 2, 3, 4); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vshll_high_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_high_n_u32() { + let a: u32x4 = u32x4::new(0, 0, 1, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vshll_high_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_high_n_s16() { + let a: i8x8 = i8x8::new(1, 2, 5, 6, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(20, 24, 28, 32, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16); + let r: i8x16 = transmute(vshrn_high_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_high_n_s32() { + let a: i16x4 = i16x4::new(1, 2, 5, 6); + let b: i32x4 = i32x4::new(20, 24, 28, 32); + let e: i16x8 = i16x8::new(1, 2, 5, 6, 5, 6, 7, 8); + let r: i16x8 = transmute(vshrn_high_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_high_n_s64() { + let a: i32x2 = i32x2::new(1, 2); + let b: i64x2 = i64x2::new(20, 24); + let e: i32x4 = i32x4::new(1, 2, 5, 6); + let r: i32x4 = transmute(vshrn_high_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_high_n_u16() { + let a: u8x8 = u8x8::new(1, 2, 5, 6, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(20, 24, 28, 32, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16); + let r: u8x16 = transmute(vshrn_high_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_high_n_u32() { + let a: u16x4 = u16x4::new(1, 2, 5, 6); + let b: u32x4 = u32x4::new(20, 24, 28, 32); + let e: u16x8 = u16x8::new(1, 2, 5, 6, 5, 6, 7, 8); + let r: u16x8 = transmute(vshrn_high_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_high_n_u64() { + let a: u32x2 = u32x2::new(1, 2); + let b: u64x2 = u64x2::new(20, 24); + let e: u32x4 = u32x4::new(1, 2, 5, 6); + let r: u32x4 = transmute(vshrn_high_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vtrn1_s8() { let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs index 8114d3d50205..7a8c294e78db 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/generated.rs @@ -7590,6 +7590,790 @@ pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { transmute(a) } +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i8")] + fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vshl_s8_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v16i8")] + fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vshlq_s8_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i16")] + fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vshl_s16_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i16")] + fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vshlq_s16_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i32")] + fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vshl_s32_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i32")] + fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vshlq_s32_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v1i64")] + fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vshl_s64_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i64")] + fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vshlq_s64_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i8")] + fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vshl_u8_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v16i8")] + fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vshlq_u8_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i16")] + fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vshl_u16_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i16")] + fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vshlq_u16_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i32")] + fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vshl_u32_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i32")] + fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vshlq_u32_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v1i64")] + fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vshl_u64_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i64")] + fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vshlq_u64_(a, b) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + simd_shl(a, vdup_n_s8(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_imm3!(N); + simd_shl(a, vdupq_n_s8(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_imm4!(N); + simd_shl(a, vdup_n_s16(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_imm4!(N); + simd_shl(a, vdupq_n_s16(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_imm5!(N); + simd_shl(a, vdup_n_s32(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_imm5!(N); + simd_shl(a, vdupq_n_s32(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + simd_shl(a, vdup_n_u8(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + simd_shl(a, vdupq_n_u8(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + simd_shl(a, vdup_n_u16(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + simd_shl(a, vdupq_n_u16(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + simd_shl(a, vdup_n_u32(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + simd_shl(a, vdupq_n_u32(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_imm6!(N); + simd_shl(a, vdup_n_s64(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_imm6!(N); + simd_shl(a, vdupq_n_s64(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + simd_shl(a, vdup_n_u64(N.try_into().unwrap())) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + simd_shl(a, vdupq_n_u64(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_s8(a: int8x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 0 && N <= 8); + simd_shl(simd_cast(a), vdupq_n_s16(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_s16(a: int16x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 0 && N <= 16); + simd_shl(simd_cast(a), vdupq_n_s32(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_s32(a: int32x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 0 && N <= 32); + simd_shl(simd_cast(a), vdupq_n_s64(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 0 && N <= 8); + simd_shl(simd_cast(a), vdupq_n_u16(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 0 && N <= 16); + simd_shl(simd_cast(a), vdupq_n_u32(N.try_into().unwrap())) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 0 && N <= 32); + simd_shl(simd_cast(a), vdupq_n_u64(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shr(a, vdup_n_s8(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shr(a, vdupq_n_s8(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shr(a, vdup_n_s16(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shr(a, vdupq_n_s16(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shr(a, vdup_n_s32(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shr(a, vdupq_n_s32(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_shr(a, vdup_n_s64(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_shr(a, vdupq_n_s64(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shr(a, vdup_n_u8(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shr(a, vdupq_n_u8(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shr(a, vdup_n_u16(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shr(a, vdupq_n_u16(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shr(a, vdup_n_u32(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shr(a, vdupq_n_u32(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_shr(a, vdup_n_u64(N.try_into().unwrap())) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_shr(a, vdupq_n_u64(N.try_into().unwrap())) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_cast(simd_shr(a, vdupq_n_s16(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_cast(simd_shr(a, vdupq_n_s32(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_cast(simd_shr(a, vdupq_n_s64(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_cast(simd_shr(a, vdupq_n_u16(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_cast(simd_shr(a, vdupq_n_u32(N.try_into().unwrap()))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_cast(simd_shr(a, vdupq_n_u64(N.try_into().unwrap()))) +} + /// Unsigned Absolute difference and Accumulate Long #[inline] #[target_feature(enable = "neon")] @@ -13550,6 +14334,502 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vshl_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i8x8 = transmute(vshl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: i8x16 = transmute(vshlq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(4, 8, 12, 16); + let r: i16x4 = transmute(vshl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vshlq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(4, 8); + let r: i32x2 = transmute(vshl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vshlq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(2); + let e: i64x1 = i64x1::new(4); + let r: i64x1 = transmute(vshl_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(2, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vshlq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u8x8 = transmute(vshl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: u8x16 = transmute(vshlq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: u16x4 = u16x4::new(4, 8, 12, 16); + let r: u16x4 = transmute(vshl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vshlq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 2); + let e: u32x2 = u32x2::new(4, 8); + let r: u32x2 = transmute(vshl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vshlq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_u64() { + let a: u64x1 = u64x1::new(1); + let b: i64x1 = i64x1::new(2); + let e: u64x1 = u64x1::new(4); + let r: u64x1 = transmute(vshl_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: i64x2 = i64x2::new(2, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vshlq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i8x8 = transmute(vshl_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: i8x16 = transmute(vshlq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(4, 8, 12, 16); + let r: i16x4 = transmute(vshl_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vshlq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_s32() { + let a: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(4, 8); + let r: i32x2 = transmute(vshl_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vshlq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u8x8 = transmute(vshl_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: u8x16 = transmute(vshlq_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(4, 8, 12, 16); + let r: u16x4 = transmute(vshl_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vshlq_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_u32() { + let a: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(4, 8); + let r: u32x2 = transmute(vshl_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vshlq_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_s64() { + let a: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(4); + let r: i64x1 = transmute(vshl_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_s64() { + let a: i64x2 = i64x2::new(1, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vshlq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_u64() { + let a: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(4); + let r: u64x1 = transmute(vshl_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u64() { + let a: u64x2 = u64x2::new(1, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vshlq_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vshll_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vshll_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_s32() { + let a: i32x2 = i32x2::new(1, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vshll_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vshll_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vshll_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_u32() { + let a: u32x2 = u32x2::new(1, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vshll_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_s8() { + let a: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vshr_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_s8() { + let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_s16() { + let a: i16x4 = i16x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vshr_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_s16() { + let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vshrq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_s32() { + let a: i32x2 = i32x2::new(4, 8); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vshr_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_s32() { + let a: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vshrq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_s64() { + let a: i64x1 = i64x1::new(4); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vshr_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_s64() { + let a: i64x2 = i64x2::new(4, 8); + let e: i64x2 = i64x2::new(1, 2); + let r: i64x2 = transmute(vshrq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_u8() { + let a: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vshr_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u8() { + let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_u16() { + let a: u16x4 = u16x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vshr_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u16() { + let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vshrq_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_u32() { + let a: u32x2 = u32x2::new(4, 8); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vshr_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u32() { + let a: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vshrq_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_u64() { + let a: u64x1 = u64x1::new(4); + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vshr_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u64() { + let a: u64x2 = u64x2::new(4, 8); + let e: u64x2 = u64x2::new(1, 2); + let r: u64x2 = transmute(vshrq_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_s16() { + let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vshrn_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_s32() { + let a: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vshrn_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_s64() { + let a: i64x2 = i64x2::new(4, 8); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vshrn_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_u16() { + let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vshrn_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_u32() { + let a: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vshrn_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_u64() { + let a: u64x2 = u64x2::new(4, 8); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vshrn_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vabal_u8() { let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs index cdb0f82c9f6f..a0bfb2f556d9 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs @@ -10,6 +10,7 @@ use crate::mem::align_of; use crate::{ core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute, }; +use core::convert::TryInto; #[cfg(test)] use stdarch_test::assert_instr; @@ -4411,70 +4412,6 @@ pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t { vdupq_n_f32(value) } -/// Unsigned shift right -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", IMM3 = 1))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("ushr", IMM3 = 1))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { - if IMM3 < 0 || IMM3 > 7 { - unreachable_unchecked(); - } else { - uint8x16_t( - a.0 >> IMM3, - a.1 >> IMM3, - a.2 >> IMM3, - a.3 >> IMM3, - a.4 >> IMM3, - a.5 >> IMM3, - a.6 >> IMM3, - a.7 >> IMM3, - a.8 >> IMM3, - a.9 >> IMM3, - a.10 >> IMM3, - a.11 >> IMM3, - a.12 >> IMM3, - a.13 >> IMM3, - a.14 >> IMM3, - a.15 >> IMM3, - ) - } -} - -/// Shift right -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshl.s8", IMM3 = 1))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, IMM3 = 1))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { - if IMM3 < 0 || IMM3 > 7 { - unreachable_unchecked(); - } else { - uint8x16_t( - a.0 << IMM3, - a.1 << IMM3, - a.2 << IMM3, - a.3 << IMM3, - a.4 << IMM3, - a.5 << IMM3, - a.6 << IMM3, - a.7 << IMM3, - a.8 << IMM3, - a.9 << IMM3, - a.10 << IMM3, - a.11 << IMM3, - a.12 << IMM3, - a.13 << IMM3, - a.14 << IMM3, - a.15 << IMM3, - ) - } -} - /// Extract vector from pair of vectors #[inline] #[target_feature(enable = "neon")] @@ -5908,22 +5845,6 @@ mod tests { assert_eq!(r, 2); } - #[simd_test(enable = "neon")] - unsafe fn test_vshrq_n_u8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = u8x16::new(0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4); - let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vshlq_n_u8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); - let e = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); - let r: u8x16 = transmute(vshlq_n_u8::<2>(transmute(a))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vext_s64() { let a: i64x1 = i64x1::new(0); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 50a9f02a95bb..460b236bd7e5 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -2058,6 +2058,124 @@ aarch64 = str generate float32x2_t:float64x1_t, float64x1_t:float32x2_t generate float32x4_t:float64x2_t, float64x2_t:float32x4_t +/// Signed Shift left +name = vshl +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 + +arm = vshl +link-arm = vshifts._EXT_ +aarch64 = sshl +link-aarch64 = sshl._EXT_ +generate int*_t, int64x*_t + +/// Unsigned Shift left +name = vshl +out-suffix +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 + +arm = vshl +link-arm = vshiftu._EXT_ +aarch64 = ushl +link-aarch64 = ushl._EXT_ +generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t +generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t + +/// Shift left +name = vshl +n-suffix +constn = N +multi_fn = static_assert_imm-out_bits_exp_len-N +multi_fn = simd_shl, a, {vdup-nself-noext, N.try_into().unwrap()} +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +n = 2 +validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 + +arm = vshl +aarch64 = shl +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +/// Signed shift left long +name = vshll +n-suffix +constn = N +multi_fn = static_assert-N-0-bits +multi_fn = simd_shl, {simd_cast, a}, {vdup-nout-noext, N.try_into().unwrap()} +a = 1, 2, 3, 4, 5, 6, 7, 8 +n = 2 +validate 4, 8, 12, 16, 20, 24, 28, 32 + +arm = vshll.s +aarch64 = sshll +generate int8x8_t:int16x8_t, int16x4_t:int32x4_t, int32x2_t:int64x2_t +aarch64 = ushll +generate uint8x8_t:uint16x8_t, uint16x4_t:uint32x4_t, uint32x2_t:uint64x2_t + +/// Signed shift left long +name = vshll_high_n +no-q +constn = N +multi_fn = static_assert-N-0-bits +multi_fn = simd_shuffle-out_len-noext, b:half, a, a, {asc-halflen-halflen} +multi_fn = vshll_n-noqself-::, b +a = 0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8 +n = 2 +validate 4, 8, 12, 16, 20, 24, 28, 32 + +aarch64 = sshll2 +generate int8x16_t:int16x8_t, int16x8_t:int32x4_t, int32x4_t:int64x2_t +aarch64 = ushll2 +generate uint8x16_t:uint16x8_t, uint16x8_t:uint32x4_t, uint32x4_t:uint64x2_t + +/// Shift right +name = vshr +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = simd_shr, a, {vdup-nself-noext, N.try_into().unwrap()} +a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +arm = vshr.s +aarch64 = sshr +generate int*_t, int64x*_t +aarch64 = ushr +generate uint*_t, uint64x*_t + +/// Shift right narrow +name = vshrn_n +no-q +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_cast, {simd_shr, a, {vdup-nself-noext, N.try_into().unwrap()}} +a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +arm = vshrn. +aarch64 = shrn +generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t +generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t + +/// Shift right narrow +name = vshrn_high_n +no-q +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_shuffle-out_len-noext, a, {vshrn_n-noqself-::, b}, {asc-0-out_len} +a = 1, 2, 5, 6, 5, 6, 7, 8 +b = 20, 24, 28, 32, 52, 56, 60, 64 +n = 2 +validate 1, 2, 5, 6, 5, 6, 7, 8, 5, 6, 7, 8, 13, 14, 15, 16 + +aarch64 = shrn2 +generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t +generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t + /// Transpose vectors name = vtrn1 multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-1-in_len} diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index adf8f4a21957..45e49eb3d248 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -84,6 +84,16 @@ fn type_len(t: &str) -> usize { } } +fn type_bits(t: &str) -> usize { + match t { + "int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t" => 8, + "int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t" => 16, + "int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" => 32, + "int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t" => 64, + _ => panic!("unknown type: {}", t), + } +} + fn type_exp_len(t: &str) -> usize { match t { "int8x8_t" => 3, @@ -118,6 +128,16 @@ fn type_exp_len(t: &str) -> usize { } } +fn type_bits_exp_len(t: &str) -> usize { + match t { + "int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t" => 3, + "int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t" => 4, + "int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" => 5, + "int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t" => 6, + _ => panic!("unknown type: {}", t), + } +} + fn type_to_suffix(t: &str) -> &str { match t { "int8x8_t" => "_s8", @@ -152,6 +172,40 @@ fn type_to_suffix(t: &str) -> &str { } } +fn type_to_n_suffix(t: &str) -> &str { + match t { + "int8x8_t" => "_n_s8", + "int8x16_t" => "q_n_s8", + "int16x4_t" => "_n_s16", + "int16x8_t" => "q_n_s16", + "int32x2_t" => "_n_s32", + "int32x4_t" => "q_n_s32", + "int64x1_t" => "_n_s64", + "int64x2_t" => "q_n_s64", + "uint8x8_t" => "_n_u8", + "uint8x16_t" => "q_n_u8", + "uint16x4_t" => "_n_u16", + "uint16x8_t" => "q_n_u16", + "uint32x2_t" => "_n_u32", + "uint32x4_t" => "q_n_u32", + "uint64x1_t" => "_n_u64", + "uint64x2_t" => "q_n_u64", + "float16x4_t" => "_n_f16", + "float16x8_t" => "q_n_f16", + "float32x2_t" => "_n_f32", + "float32x4_t" => "q_n_f32", + "float64x1_t" => "_n_f64", + "float64x2_t" => "q_n_f64", + "poly8x8_t" => "_n_p8", + "poly8x16_t" => "q_n_p8", + "poly16x4_t" => "_n_p16", + "poly16x8_t" => "q_n_p16", + "poly64x1_t" => "_n_p64", + "poly64x2_t" => "q_n_p64", + _ => panic!("unknown type: {}", t), + } +} + fn type_to_signed_suffix(t: &str) -> &str { match t { "int8x8_t" | "uint8x8_t" | "poly8x8_t" => "_s8", @@ -243,6 +297,8 @@ enum Suffix { Double, NoQ, NoQDouble, + NSuffix, + OutSuffix, } #[derive(Clone, Copy)] @@ -673,6 +729,8 @@ fn gen_aarch64( current_name, type_to_noq_double_suffixes(out_t, in_t[1]) ), + NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])), + OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), }; let current_fn = if let Some(current_fn) = current_fn.clone() { if link_aarch64.is_some() { @@ -988,6 +1046,8 @@ fn gen_arm( current_name, type_to_noq_double_suffixes(out_t, in_t[1]) ), + NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])), + OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)), }; let current_aarch64 = current_aarch64 .clone() @@ -1298,11 +1358,13 @@ fn get_call( let start = match &*fn_format[1] { "0" => 0, "n" => n.unwrap(), + "halflen" => type_half_len_str(in_t[1]).parse::().unwrap(), s => s.parse::().unwrap(), }; let len = match &*fn_format[2] { "out_len" => type_len(out_t), "in_len" => type_len(in_t[1]), + "halflen" => type_half_len_str(in_t[1]).parse::().unwrap(), _ => 0, }; return asc(start, len); @@ -1311,11 +1373,33 @@ fn get_call( let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); let len = match &*fn_format[1] { "out_exp_len" => type_exp_len(out_t), + "out_bits_exp_len" => type_bits_exp_len(out_t), "in_exp_len" => type_exp_len(in_t[1]), + "in_bits_exp_len" => type_bits_exp_len(in_t[1]), _ => 0, }; - let sa = format!(r#"static_assert_imm{}!({});"#, len, fn_format[2]); - return sa; + return format!(r#"static_assert_imm{}!({});"#, len, fn_format[2]); + } + if fn_name.starts_with("static_assert") { + let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); + let lim1 = if fn_format[2] == "bits" { + type_bits(in_t[1]).to_string() + } else if fn_format[2] == "halfbits" { + (type_bits(in_t[1]) / 2).to_string() + } else { + fn_format[2].clone() + }; + let lim2 = if fn_format[3] == "bits" { + type_bits(in_t[1]).to_string() + } else if fn_format[3] == "halfbits" { + (type_bits(in_t[1]) / 2).to_string() + } else { + fn_format[3].clone() + }; + return format!( + r#"static_assert!({} : i32 where {} >= {} && {} <= {});"#, + fn_format[1], fn_format[1], lim1, fn_format[1], lim2 + ); } if fn_name.starts_with("matchn") { let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect(); @@ -1430,6 +1514,8 @@ fn get_call( }; if fn_format[1] == "self" { fn_name.push_str(type_to_suffix(in_t[1])); + } else if fn_format[1] == "nself" { + fn_name.push_str(type_to_n_suffix(in_t[1])); } else if fn_format[1] == "signed" { fn_name.push_str(type_to_signed_suffix(in_t[1])); } else if fn_format[1] == "unsigned" { @@ -1445,11 +1531,16 @@ fn get_call( fn_name.push_str(&type_len(in_t[1]).to_string()); } else if fn_format[1] == "out_len" { fn_name.push_str(&type_len(out_t).to_string()); + } else if fn_format[1] == "nout" { + fn_name.push_str(type_to_n_suffix(out_t)); } else { fn_name.push_str(&fn_format[1]); }; if fn_format[2] == "ext" { fn_name.push_str("_"); + } else if fn_format[2] == "noext" { + } else { + fn_name.push_str(&fn_format[2]); } } if param_str.is_empty() { @@ -1595,6 +1686,10 @@ mod test { suffix = NoQ; } else if line.starts_with("noq-double-suffixes") { suffix = NoQDouble; + } else if line.starts_with("n-suffix") { + suffix = NSuffix; + } else if line.starts_with("out-suffix") { + suffix = OutSuffix; } else if line.starts_with("a = ") { a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); } else if line.starts_with("b = ") {