diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index ed5b0b5423b1..165caf9c3a40 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -4605,6 +4605,140 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { transmute(a) } +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(srshl))] +pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 { + transmute(vrshl_s64(transmute(a), transmute(b))) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(urshl))] +pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 { + transmute(vrshl_u64(transmute(a), transmute(b))) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrd_n_s64(a: i64) -> i64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshld_s64(a, -N as i64) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrd_n_u64(a: u64) -> u64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshld_u64(a, -N as i64) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrshrn_high_n_s16(a: int8x8_t, b: int16x8_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vrshrn_n_s16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrshrn_high_n_s32(a: int16x4_t, b: int32x4_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vrshrn_n_s32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vrshrn_n_s64::(b), [0, 1, 2, 3]) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrshrn_high_n_u16(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_shuffle16(a, vrshrn_n_u16::(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrshrn_high_n_u32(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_shuffle8(a, vrshrn_n_u32::(b), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn2, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_shuffle4(a, vrshrn_n_u64::(b), [0, 1, 2, 3]) +} + +/// Signed rounding shift right and accumulate. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsrad_n_s64(a: i64, b: i64) -> i64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + let b: int64x1_t = vrshr_n_s64::(transmute(b)); + transmute(simd_add(transmute(a), b)) +} + +/// Ungisned rounding shift right and accumulate. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsrad_n_u64(a: u64, b: u64) -> u64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + let b: uint64x1_t = vrshr_n_u64::(transmute(b)); + transmute(simd_add(transmute(a), b)) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sshl))] +pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 { + transmute(vshl_s64(transmute(a), transmute(b))) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ushl))] +pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 { + transmute(vshl_u64(transmute(a), transmute(b))) +} + /// Signed shift left long #[inline] #[target_feature(enable = "neon")] @@ -9872,6 +10006,130 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vrshld_s64() { + let a: i64 = 1; + let b: i64 = 2; + let e: i64 = 4; + let r: i64 = transmute(vrshld_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshld_u64() { + let a: u64 = 1; + let b: i64 = 2; + let e: u64 = 4; + let r: u64 = transmute(vrshld_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrd_n_s64() { + let a: i64 = 4; + let e: i64 = 1; + let r: i64 = transmute(vrshrd_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrd_n_u64() { + let a: u64 = 4; + let e: u64 = 1; + let r: u64 = transmute(vrshrd_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_high_n_s16() { + let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60); + let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vrshrn_high_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_high_n_s32() { + let a: i16x4 = i16x4::new(0, 1, 8, 9); + let b: i32x4 = i32x4::new(32, 36, 40, 44); + let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let r: i16x8 = transmute(vrshrn_high_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_high_n_s64() { + let a: i32x2 = i32x2::new(0, 1); + let b: i64x2 = i64x2::new(32, 36); + let e: i32x4 = i32x4::new(0, 1, 8, 9); + let r: i32x4 = transmute(vrshrn_high_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_high_n_u16() { + let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60); + let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15); + let r: u8x16 = transmute(vrshrn_high_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_high_n_u32() { + let a: u16x4 = u16x4::new(0, 1, 8, 9); + let b: u32x4 = u32x4::new(32, 36, 40, 44); + let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11); + let r: u16x8 = transmute(vrshrn_high_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_high_n_u64() { + let a: u32x2 = u32x2::new(0, 1); + let b: u64x2 = u64x2::new(32, 36); + let e: u32x4 = u32x4::new(0, 1, 8, 9); + let r: u32x4 = transmute(vrshrn_high_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsrad_n_s64() { + let a: i64 = 1; + let b: i64 = 4; + let e: i64 = 2; + let r: i64 = transmute(vrsrad_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsrad_n_u64() { + let a: u64 = 1; + let b: u64 = 4; + let e: u64 = 2; + let r: u64 = transmute(vrsrad_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshld_s64() { + let a: i64 = 1; + let b: i64 = 2; + let e: i64 = 4; + let r: i64 = transmute(vshld_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshld_u64() { + let a: u64 = 1; + let b: i64 = 2; + let e: u64 = 4; + let r: u64 = transmute(vshld_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vshll_high_n_s8() { let a: i8x16 = i8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8); diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs index 71e6b83a63c4..ce2b0485a7a2 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs @@ -2447,6 +2447,66 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol )) } +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshld_n_s64(a: i64) -> i64 { + static_assert_imm6!(N); + a << N +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshld_n_u64(a: u64) -> u64 { + static_assert_imm6!(N); + a << N +} + +/// Signed shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrd_n_s64(a: i64) -> i64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + a >> N +} + +/// Unsigned shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vshrd_n_u64(a: u64) -> u64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + a >> N +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsrad_n_s64(a: i64, b: i64) -> i64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + a + (b >> N) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsrad_n_u64(a: u64, b: u64) -> u64 { + static_assert!(N : i32 where N >= 1 && N <= 64); + a + (b >> N) +} + /// Shift Left and Insert (immediate) #[inline] #[target_feature(enable = "neon")] @@ -3512,6 +3572,56 @@ mod tests { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vshld_n_s64() { + let a: i64 = 1; + let e: i64 = 4; + let r: i64 = vshld_n_s64::<2>(a); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshld_n_u64() { + let a: u64 = 1; + let e: u64 = 4; + let r: u64 = vshld_n_u64::<2>(a); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrd_n_s64() { + let a: i64 = 4; + let e: i64 = 1; + let r: i64 = vshrd_n_s64::<2>(a); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrd_n_u64() { + let a: u64 = 4; + let e: u64 = 1; + let r: u64 = vshrd_n_u64::<2>(a); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsrad_n_s64() { + let a: i64 = 1; + let b: i64 = 4; + let e: i64 = 2; + let r: i64 = vsrad_n_s64::<2>(a, b); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsrad_n_u64() { + let a: u64 = 1; + let b: u64 = 4; + let e: u64 = 2; + let r: u64 = vsrad_n_u64::<2>(a, b); + assert_eq!(r, e); + } + macro_rules! test_vcombine { ($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => { #[allow(unused_assignments)] diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 3bfd86f03c1a..fcf8ad4eaa20 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -9452,6 +9452,778 @@ pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { transmute(a) } +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i8")] + fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vrshl_s8_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v16i8")] + fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vrshlq_s8_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i16")] + fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vrshl_s16_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i16")] + fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vrshlq_s16_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i32")] + fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vrshl_s32_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i32")] + fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vrshlq_s32_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v1i64")] + fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vrshl_s64_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i64")] + fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vrshlq_s64_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i8")] + fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vrshl_u8_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v16i8")] + fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vrshlq_u8_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i16")] + fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vrshl_u16_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i16")] + fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vrshlq_u16_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i32")] + fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vrshl_u32_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i32")] + fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vrshlq_u32_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v1i64")] + fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vrshl_u64_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i64")] + fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vrshlq_u64_(a, b) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshl_s8(a, vdup_n_s8((-N).try_into().unwrap())) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshlq_s8(a, vdupq_n_s8((-N).try_into().unwrap())) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshl_s16(a, vdup_n_s16((-N).try_into().unwrap())) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshlq_s16(a, vdupq_n_s16((-N).try_into().unwrap())) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshl_s32(a, vdup_n_s32((-N).try_into().unwrap())) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshlq_s32(a, vdupq_n_s32((-N).try_into().unwrap())) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshl_s64(a, vdup_n_s64((-N).try_into().unwrap())) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshlq_s64(a, vdupq_n_s64((-N).try_into().unwrap())) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshl_u8(a, vdup_n_s8((-N).try_into().unwrap())) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshlq_u8(a, vdupq_n_s8((-N).try_into().unwrap())) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshl_u16(a, vdup_n_s16((-N).try_into().unwrap())) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshlq_u16(a, vdupq_n_s16((-N).try_into().unwrap())) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshl_u32(a, vdup_n_s32((-N).try_into().unwrap())) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshlq_u32(a, vdupq_n_s32((-N).try_into().unwrap())) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshl_u64(a, vdup_n_s64((-N).try_into().unwrap())) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshlq_u64(a, vdupq_n_s64((-N).try_into().unwrap())) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] + fn vrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v8i8")] + fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vrshrn_n_s16_(a, N) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] + fn vrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v4i16")] + fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vrshrn_n_s32_(a, N) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] + fn vrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v2i32")] + fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vrshrn_n_s64_(a, N) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + transmute(vrshrn_n_s16::(transmute(a))) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + transmute(vrshrn_n_s32::(transmute(a))) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + transmute(vrshrn_n_s64::(transmute(a))) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshr_n_s8::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshrq_n_s8::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshr_n_s16::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshrq_n_s16::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshr_n_s32::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshrq_n_s32::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshr_n_s64::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshrq_n_s64::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshr_n_u8::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshrq_n_u8::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshr_n_u16::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshrq_n_u16::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshr_n_u32::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshrq_n_u32::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshr_n_u64::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshrq_n_u64::(b)) +} + /// Signed Shift left #[inline] #[target_feature(enable = "neon")] @@ -10236,6 +11008,198 @@ pub unsafe fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { simd_cast(simd_shr(a, vdupq_n_u64(N.try_into().unwrap()))) } +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshr_n_s8::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshrq_n_s8::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshr_n_s16::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshrq_n_s16::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshr_n_s32::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshrq_n_s32::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshr_n_s64::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshrq_n_s64::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshr_n_u8::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshrq_n_u8::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshr_n_u16::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshrq_n_u16::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshr_n_u32::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshrq_n_u32::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshr_n_u64::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshrq_n_u64::(b)) +} + /// Unsigned Absolute difference and Accumulate Long #[inline] #[target_feature(enable = "neon")] @@ -17148,6 +18112,470 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i8x8 = transmute(vrshl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: i8x16 = transmute(vrshlq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(4, 8, 12, 16); + let r: i16x4 = transmute(vrshl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vrshlq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(4, 8); + let r: i32x2 = transmute(vrshl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vrshlq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(2); + let e: i64x1 = i64x1::new(4); + let r: i64x1 = transmute(vrshl_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(2, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vrshlq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u8x8 = transmute(vrshl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: u8x16 = transmute(vrshlq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: u16x4 = u16x4::new(4, 8, 12, 16); + let r: u16x4 = transmute(vrshl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vrshlq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 2); + let e: u32x2 = u32x2::new(4, 8); + let r: u32x2 = transmute(vrshl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vrshlq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_u64() { + let a: u64x1 = u64x1::new(1); + let b: i64x1 = i64x1::new(2); + let e: u64x1 = u64x1::new(4); + let r: u64x1 = transmute(vrshl_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: i64x2 = i64x2::new(2, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vrshlq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_s8() { + let a: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vrshr_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_s8() { + let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vrshrq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_s16() { + let a: i16x4 = i16x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vrshr_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_s16() { + let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vrshrq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_s32() { + let a: i32x2 = i32x2::new(4, 8); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vrshr_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_s32() { + let a: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vrshrq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_s64() { + let a: i64x1 = i64x1::new(4); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vrshr_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_s64() { + let a: i64x2 = i64x2::new(4, 8); + let e: i64x2 = i64x2::new(1, 2); + let r: i64x2 = transmute(vrshrq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_u8() { + let a: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vrshr_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_u8() { + let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vrshrq_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_u16() { + let a: u16x4 = u16x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vrshr_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_u16() { + let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vrshrq_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_u32() { + let a: u32x2 = u32x2::new(4, 8); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vrshr_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_u32() { + let a: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vrshrq_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_u64() { + let a: u64x1 = u64x1::new(4); + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vrshr_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_u64() { + let a: u64x2 = u64x2::new(4, 8); + let e: u64x2 = u64x2::new(1, 2); + let r: u64x2 = transmute(vrshrq_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_s16() { + let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vrshrn_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_s32() { + let a: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vrshrn_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_s64() { + let a: i64x2 = i64x2::new(4, 8); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vrshrn_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_u16() { + let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vrshrn_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_u32() { + let a: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vrshrn_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_u64() { + let a: u64x2 = u64x2::new(4, 8); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vrshrn_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: i8x8 = transmute(vrsra_n_s8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + let r: i8x16 = transmute(vrsraq_n_s8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 1); + let b: i16x4 = i16x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(2, 3, 4, 5); + let r: i16x4 = transmute(vrsra_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i16x8 = i16x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: i16x8 = transmute(vrsraq_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_s32() { + let a: i32x2 = i32x2::new(1, 1); + let b: i32x2 = i32x2::new(4, 8); + let e: i32x2 = i32x2::new(2, 3); + let r: i32x2 = transmute(vrsra_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 1); + let b: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i32x4 = i32x4::new(2, 3, 4, 5); + let r: i32x4 = transmute(vrsraq_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(4); + let e: i64x1 = i64x1::new(2); + let r: i64x1 = transmute(vrsra_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_s64() { + let a: i64x2 = i64x2::new(1, 1); + let b: i64x2 = i64x2::new(4, 8); + let e: i64x2 = i64x2::new(2, 3); + let r: i64x2 = transmute(vrsraq_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: u8x8 = transmute(vrsra_n_u8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + let r: u8x16 = transmute(vrsraq_n_u8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 1); + let b: u16x4 = u16x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(2, 3, 4, 5); + let r: u16x4 = transmute(vrsra_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u16x8 = u16x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: u16x8 = transmute(vrsraq_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_u32() { + let a: u32x2 = u32x2::new(1, 1); + let b: u32x2 = u32x2::new(4, 8); + let e: u32x2 = u32x2::new(2, 3); + let r: u32x2 = transmute(vrsra_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 1); + let b: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u32x4 = u32x4::new(2, 3, 4, 5); + let r: u32x4 = transmute(vrsraq_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(4); + let e: u64x1 = u64x1::new(2); + let r: u64x1 = transmute(vrsra_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_u64() { + let a: u64x2 = u64x2::new(1, 1); + let b: u64x2 = u64x2::new(4, 8); + let e: u64x2 = u64x2::new(2, 3); + let r: u64x2 = transmute(vrsraq_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vshl_s8() { let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); @@ -17644,6 +19072,150 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: i8x8 = transmute(vsra_n_s8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + let r: i8x16 = transmute(vsraq_n_s8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 1); + let b: i16x4 = i16x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(2, 3, 4, 5); + let r: i16x4 = transmute(vsra_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i16x8 = i16x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: i16x8 = transmute(vsraq_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_s32() { + let a: i32x2 = i32x2::new(1, 1); + let b: i32x2 = i32x2::new(4, 8); + let e: i32x2 = i32x2::new(2, 3); + let r: i32x2 = transmute(vsra_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 1); + let b: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i32x4 = i32x4::new(2, 3, 4, 5); + let r: i32x4 = transmute(vsraq_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(4); + let e: i64x1 = i64x1::new(2); + let r: i64x1 = transmute(vsra_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_s64() { + let a: i64x2 = i64x2::new(1, 1); + let b: i64x2 = i64x2::new(4, 8); + let e: i64x2 = i64x2::new(2, 3); + let r: i64x2 = transmute(vsraq_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: u8x8 = transmute(vsra_n_u8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + let r: u8x16 = transmute(vsraq_n_u8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 1); + let b: u16x4 = u16x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(2, 3, 4, 5); + let r: u16x4 = transmute(vsra_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u16x8 = u16x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: u16x8 = transmute(vsraq_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_u32() { + let a: u32x2 = u32x2::new(1, 1); + let b: u32x2 = u32x2::new(4, 8); + let e: u32x2 = u32x2::new(2, 3); + let r: u32x2 = transmute(vsra_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 1); + let b: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u32x4 = u32x4::new(2, 3, 4, 5); + let r: u32x4 = transmute(vsraq_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(4); + let e: u64x1 = u64x1::new(2); + let r: u64x1 = transmute(vsra_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_u64() { + let a: u64x2 = u64x2::new(1, 1); + let b: u64x2 = u64x2::new(4, 8); + let e: u64x2 = u64x2::new(2, 3); + let r: u64x2 = transmute(vsraq_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vabal_u8() { let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 74ca739cc4bc..1f0d70362c63 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -2709,6 +2709,218 @@ aarch64 = str generate float32x2_t:float64x1_t, float64x1_t:float32x2_t generate float32x4_t:float64x2_t, float64x2_t:float32x4_t +/// Signed rounding shift left +name = vrshl +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 + +aarch64 = srshl +link-aarch64 = srshl._EXT_ + +arm = vrshl +link-arm = vrshifts._EXT_ +generate int*_t, int64x*_t + +/// Signed rounding shift left +name = vrshl +multi_fn = transmute, {vrshl-in_ntt-noext, transmute(a), transmute(b)} +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 + +aarch64 = srshl +generate i64 + +/// Unsigned rounding shift left +name = vrshl +out-suffix +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 + +aarch64 = urshl +link-aarch64 = urshl._EXT_ + +arm = vrshl +link-arm = vrshiftu._EXT_ +generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t +generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t + +/// Unsigned rounding shift left +name = vrshl +out-suffix +multi_fn = transmute, {vrshl-out_ntt-noext, transmute(a), transmute(b)} +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 +validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 + +aarch64 = urshl +generate u64:i64:u64 + +/// Signed rounding shift right +name = vrshr +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = vrshl-self-noext, a, {vdup-nself-noext, (-N).try_into().unwrap()} +a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = srshr +arm = vrshr +generate int*_t, int64x*_t + +/// Signed rounding shift right +name = vrshr +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = vrshl-self-noext, a, -N as i64 +a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = srshr +generate i64 + +/// Unsigned rounding shift right +name = vrshr +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, (-N).try_into().unwrap()} +a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = urshr +arm = vrshr +generate uint*_t, uint64x*_t + +/// Unsigned rounding shift right +name = vrshr +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = vrshl-self-noext, a, -N as i64 +a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = urshr +generate u64 + +/// Rounding shift right narrow +name = vrshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = rshrn +link-aarch64 = rshrn._EXT2_ +const-aarch64 = N + +arm = vrshrn +link-arm = vrshiftn._EXT2_ +const-arm = -N as ttn +generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t + +/// Rounding shift right narrow +name = vrshrn +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = transmute, {vrshrn_n-noqsigned-::, transmute(a)} +a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 + +aarch64 = rshrn +arm = vrshrn +generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t + +/// Rounding shift right narrow +name = vrshrn_high +noq-n-suffix +constn = N +multi_fn = static_assert-N-1-halfbits +multi_fn = simd_shuffle-out_len-noext, a, {vrshrn_n-noqself-::, b}, {asc-0-out_len} +a = 0, 1, 8, 9, 8, 9, 10, 11 +b = 32, 36, 40, 44, 48, 52, 56, 60 +n = 2 +validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15 + +aarch64 = rshrn2 +generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t +generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t + +/// Signed rounding shift right and accumulate +name = vrsra +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = simd_add, a, {vrshr-nself-::, b} +a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 + +aarch64 = srsra +arm = vrsra +generate int*_t, int64x*_t + +/// Unsigned rounding shift right and accumulate +name = vrsra +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = simd_add, a, {vrshr-nself-::, b} +a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 + +aarch64 = ursra +arm = vrsra +generate uint*_t, uint64x*_t + +/// Signed rounding shift right and accumulate. +name = vrsra +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = vrshr_n-in_ntt-::, b:in_ntt, transmute(b) +multi_fn = transmute, {simd_add, transmute(a), b} +a = 1 +b = 4 +n = 2 +validate 2 + +// We use "nop" here to skip the instruction test, since it cannot be optimized correctly. +aarch64 = nop +generate i64 + +/// Ungisned rounding shift right and accumulate. +name = vrsra +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = vrshr_n-in_ntt-::, b:in_ntt, transmute(b) +multi_fn = transmute, {simd_add, transmute(a), b} +a = 1 +b = 4 +n = 2 +validate 2 + +// We use "nop" here to skip the instruction test, since it cannot be optimized correctly. +aarch64 = nop +generate u64 + /// Signed Shift left name = vshl a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 @@ -2721,6 +2933,16 @@ aarch64 = sshl link-aarch64 = sshl._EXT_ generate int*_t, int64x*_t +/// Signed Shift left +name = vshl +multi_fn = transmute, {vshl-in_ntt-noext, transmute(a), transmute(b)} +a = 1 +b = 2 +validate 4 + +aarch64 = sshl +generate i64 + /// Unsigned Shift left name = vshl out-suffix @@ -2735,6 +2957,17 @@ link-aarch64 = ushl._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t +/// Unsigned Shift left +out-suffix +name = vshl +multi_fn = transmute, {vshl-out_ntt-noext, transmute(a), transmute(b)} +a = 1 +b = 2 +validate 4 + +aarch64 = ushl +generate u64:i64:u64 + /// Shift left name = vshl n-suffix @@ -2827,6 +3060,36 @@ aarch64 = shrn2 generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t +/// Signed shift right and accumulate +name = vsra +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = simd_add, a, {vshr-nself-::, b} +a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 + +aarch64 = ssra +arm = vsra +generate int*_t, int64x*_t + +/// Unsigned shift right and accumulate +name = vsra +n-suffix +constn = N +multi_fn = static_assert-N-1-bits +multi_fn = simd_add, a, {vshr-nself-::, b} +a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 +b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 +n = 2 +validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 + +aarch64 = usra +arm = vsra +generate uint*_t, uint64x*_t + /// Transpose vectors name = vtrn1 multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-1-in_len} diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index 1babd33744a6..ab5a396c3606 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -1932,6 +1932,8 @@ fn get_call( fn_name.push_str(&type_to_noq_double_suffixes(out_t, in_t[1])); } else if fn_format[1] == "noqself" { fn_name.push_str(type_to_noq_suffix(in_t[1])); + } else if fn_format[1] == "noqsigned" { + fn_name.push_str(type_to_noq_suffix(type_to_signed(in_t[1]))); } else if fn_format[1] == "nosuffix" { } else if fn_format[1] == "in_len" { fn_name.push_str(&type_len(in_t[1]).to_string());