diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 791e7707bc70..88fe4cb085cc 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -3934,26 +3934,6 @@ pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 { simd_extract(vqsub_s16(a, b), 0) } -/// Saturating subtract -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqsub))] -pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 { - let a: int32x2_t = vdup_n_s32(a); - let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqsub_s32(a, b), 0) -} - -/// Saturating subtract -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqsub))] -pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 { - let a: int64x1_t = vdup_n_s64(a); - let b: int64x1_t = vdup_n_s64(b); - simd_extract(vqsub_s64(a, b), 0) -} - /// Saturating subtract #[inline] #[target_feature(enable = "neon")] @@ -3979,9 +3959,12 @@ pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqsub))] pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 { - let a: uint32x2_t = vdup_n_u32(a); - let b: uint32x2_t = vdup_n_u32(b); - simd_extract(vqsub_u32(a, b), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i32")] + fn vqsubs_u32_(a: u32, b: u32) -> u32; + } + vqsubs_u32_(a, b) } /// Saturating subtract @@ -3989,9 +3972,38 @@ pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqsub))] pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 { - let a: uint64x1_t = vdup_n_u64(a); - let b: uint64x1_t = vdup_n_u64(b); - simd_extract(vqsub_u64(a, b), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i64")] + fn vqsubd_u64_(a: u64, b: u64) -> u64; + } + vqsubd_u64_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqsub))] +pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i32")] + fn vqsubs_s32_(a: i32, b: i32) -> i32; + } + vqsubs_s32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqsub))] +pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i64")] + fn vqsubd_s64_(a: i64, b: i64) -> i64; + } + vqsubd_s64_(a, b) } /// Reverse bit order @@ -4410,26 +4422,6 @@ pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 { simd_extract(vqadd_s16(a, b), 0) } -/// Saturating add -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqadd))] -pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 { - let a: int32x2_t = vdup_n_s32(a); - let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqadd_s32(a, b), 0) -} - -/// Saturating add -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqadd))] -pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 { - let a: int64x1_t = vdup_n_s64(a); - let b: int64x1_t = vdup_n_s64(b); - simd_extract(vqadd_s64(a, b), 0) -} - /// Saturating add #[inline] #[target_feature(enable = "neon")] @@ -4455,9 +4447,12 @@ pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqadd))] pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 { - let a: uint32x2_t = vdup_n_u32(a); - let b: uint32x2_t = vdup_n_u32(b); - simd_extract(vqadd_u32(a, b), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i32")] + fn vqadds_u32_(a: u32, b: u32) -> u32; + } + vqadds_u32_(a, b) } /// Saturating add @@ -4465,9 +4460,38 @@ pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqadd))] pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 { - let a: uint64x1_t = vdup_n_u64(a); - let b: uint64x1_t = vdup_n_u64(b); - simd_extract(vqadd_u64(a, b), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i64")] + fn vqaddd_u64_(a: u64, b: u64) -> u64; + } + vqaddd_u64_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqadd))] +pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i32")] + fn vqadds_s32_(a: i32, b: i32) -> i32; + } + vqadds_s32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqadd))] +pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i64")] + fn vqaddd_s64_(a: i64, b: i64) -> i64; + } + vqaddd_s64_(a, b) } /// Multiply @@ -5932,14 +5956,6 @@ pub unsafe fn vqmovns_s32(a: i32) -> i16 { simd_extract(vqmovn_s32(vdupq_n_s32(a)), 0) } -/// Saturating extract narrow -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqxtn))] -pub unsafe fn vqmovnd_s64(a: i64) -> i32 { - simd_extract(vqmovn_s64(vdupq_n_s64(a)), 0) -} - /// Saturating extract narrow #[inline] #[target_feature(enable = "neon")] @@ -5956,12 +5972,30 @@ pub unsafe fn vqmovns_u32(a: u32) -> u16 { simd_extract(vqmovn_u32(vdupq_n_u32(a)), 0) } +/// Saturating extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqxtn))] +pub unsafe fn vqmovnd_s64(a: i64) -> i32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")] + fn vqmovnd_s64_(a: i64) -> i32; + } + vqmovnd_s64_(a) +} + /// Saturating extract narrow #[inline] #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(uqxtn))] pub unsafe fn vqmovnd_u64(a: u64) -> u32 { - simd_extract(vqmovn_u64(vdupq_n_u64(a)), 0) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")] + fn vqmovnd_u64_(a: u64) -> u32; + } + vqmovnd_u64_(a) } /// Signed saturating extract narrow @@ -6228,6 +6262,32 @@ pub unsafe fn vqrdmlshs_laneq_s32(a: i32, b: i32, c: int32x4_t) vqsubs_s32(a, vqrdmulhs_laneq_s32::(b, c)) } +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshl))] +pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")] + fn vqrshls_s32_(a: i32, b: i32) -> i32; + } + vqrshls_s32_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshl))] +pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")] + fn vqrshld_s64_(a: i64, b: i64) -> i64; + } + vqrshld_s64_(a, b) +} + /// Signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] @@ -6248,24 +6308,30 @@ pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 { simd_extract(vqrshl_s16(a, b), 0) } -/// Signed saturating rounding shift left +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqrshl))] -pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 { - let a: int32x2_t = vdup_n_s32(a); - let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqrshl_s32(a, b), 0) +#[cfg_attr(test, assert_instr(uqrshl))] +pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")] + fn vqrshls_u32_(a: u32, b: i32) -> u32; + } + vqrshls_u32_(a, b) } -/// Signed saturating rounding shift left +/// Unsigned signed saturating rounding shift left #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqrshl))] -pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 { - let a: int64x1_t = vdup_n_s64(a); - let b: int64x1_t = vdup_n_s64(b); - simd_extract(vqrshl_s64(a, b), 0) +#[cfg_attr(test, assert_instr(uqrshl))] +pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")] + fn vqrshld_u64_(a: u64, b: i64) -> u64; + } + vqrshld_u64_(a, b) } /// Unsigned signed saturating rounding shift left @@ -6288,26 +6354,6 @@ pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 { simd_extract(vqrshl_u16(a, b), 0) } -/// Unsigned signed saturating rounding shift left -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqrshl))] -pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 { - let a: uint32x2_t = vdup_n_u32(a); - let b: int32x2_t = vdup_n_s32(b); - simd_extract(vqrshl_u32(a, b), 0) -} - -/// Unsigned signed saturating rounding shift left -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqrshl))] -pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 { - let a: uint64x1_t = vdup_n_u64(a); - let b: int64x1_t = vdup_n_s64(b); - simd_extract(vqrshl_u64(a, b), 0) -} - /// Signed saturating rounded shift right narrow #[inline] #[target_feature(enable = "neon")] @@ -6497,6 +6543,19 @@ pub unsafe fn vqrshrun_high_n_s64(a: uint32x2_t, b: int64x2_t) -> simd_shuffle4!(a, vqrshrun_n_s64::(b), [0, 1, 2, 3]) } +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshl))] +pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")] + fn vqshld_s64_(a: i64, b: i64) -> i64; + } + vqshld_s64_(a, b) +} + /// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] @@ -6524,13 +6583,17 @@ pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 { simd_extract(c, 0) } -/// Signed saturating shift left +/// Unsigned saturating shift left #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshl))] -pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 { - let c: int64x1_t = vqshl_s64(vdup_n_s64(a), vdup_n_s64(b)); - simd_extract(c, 0) +#[cfg_attr(test, assert_instr(uqshl))] +pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")] + fn vqshld_u64_(a: u64, b: i64) -> u64; + } + vqshld_u64_(a, b) } /// Unsigned saturating shift left @@ -6560,15 +6623,6 @@ pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 { simd_extract(c, 0) } -/// Unsigned saturating shift left -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqshl))] -pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 { - let c: uint64x1_t = vqshl_u64(vdup_n_u64(a), vdup_n_s64(b)); - simd_extract(c, 0) -} - /// Signed saturating shift left #[inline] #[target_feature(enable = "neon")] @@ -6649,6 +6703,21 @@ pub unsafe fn vqshld_n_u64(a: u64) -> u64 { simd_extract(vqshl_n_u64::(vdup_n_u64(a)), 0) } +/// Signed saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrnd_n_s64(a: i64) -> i32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")] + fn vqshrnd_n_s64_(a: i64, n: i32) -> i32; + } + vqshrnd_n_s64_(a, N) +} + /// Signed saturating shift right narrow #[inline] #[target_feature(enable = "neon")] @@ -6669,16 +6738,6 @@ pub unsafe fn vqshrns_n_s32(a: i32) -> i16 { simd_extract(vqshrn_n_s32::(vdupq_n_s32(a)), 0) } -/// Signed saturating shift right narrow -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(sqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrnd_n_s64(a: i64) -> i32 { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_extract(vqshrn_n_s64::(vdupq_n_s64(a)), 0) -} - /// Signed saturating shift right narrow #[inline] #[target_feature(enable = "neon")] @@ -6709,6 +6768,21 @@ pub unsafe fn vqshrn_high_n_s64(a: int32x2_t, b: int64x2_t) -> int simd_shuffle4!(a, vqshrn_n_s64::(b), [0, 1, 2, 3]) } +/// Unsigned saturating shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrnd_n_u64(a: u64) -> u32 { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")] + fn vqshrnd_n_u64_(a: u64, n: i32) -> u32; + } + vqshrnd_n_u64_(a, N) +} + /// Unsigned saturating shift right narrow #[inline] #[target_feature(enable = "neon")] @@ -6729,16 +6803,6 @@ pub unsafe fn vqshrns_n_u32(a: u32) -> u16 { simd_extract(vqshrn_n_u32::(vdupq_n_u32(a)), 0) } -/// Unsigned saturating shift right narrow -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(uqshrn, N = 2))] -#[rustc_legacy_const_generics(1)] -pub unsafe fn vqshrnd_n_u64(a: u64) -> u32 { - static_assert!(N : i32 where N >= 1 && N <= 32); - simd_extract(vqshrn_n_u64::(vdupq_n_u64(a)), 0) -} - /// Unsigned saturating shift right narrow #[inline] #[target_feature(enable = "neon")] @@ -7654,7 +7718,12 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(srshl))] pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 { - transmute(vrshl_s64(transmute(a), transmute(b))) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")] + fn vrshld_s64_(a: i64, b: i64) -> i64; + } + vrshld_s64_(a, b) } /// Unsigned rounding shift left @@ -7662,7 +7731,12 @@ pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(urshl))] pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 { - transmute(vrshl_u64(transmute(a), transmute(b))) + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")] + fn vrshld_u64_(a: u64, b: i64) -> u64; + } + vrshld_u64_(a, b) } /// Signed rounding shift right @@ -7748,23 +7822,23 @@ pub unsafe fn vrshrn_high_n_u64(a: uint32x2_t, b: uint64x2_t) -> u /// Signed rounding shift right and accumulate. #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N = 2))] +#[cfg_attr(test, assert_instr(srsra, N = 2))] #[rustc_legacy_const_generics(2)] pub unsafe fn vrsrad_n_s64(a: i64, b: i64) -> i64 { static_assert!(N : i32 where N >= 1 && N <= 64); - let b: int64x1_t = vrshr_n_s64::(transmute(b)); - transmute(simd_add(transmute(a), b)) + let b: i64 = vrshrd_n_s64::(b); + a + b } /// Ungisned rounding shift right and accumulate. #[inline] #[target_feature(enable = "neon")] -#[cfg_attr(test, assert_instr(nop, N = 2))] +#[cfg_attr(test, assert_instr(ursra, N = 2))] #[rustc_legacy_const_generics(2)] pub unsafe fn vrsrad_n_u64(a: u64, b: u64) -> u64 { static_assert!(N : i32 where N >= 1 && N <= 64); - let b: uint64x1_t = vrshr_n_u64::(transmute(b)); - transmute(simd_add(transmute(a), b)) + let b: u64 = vrshrd_n_u64::(b); + a + b } /// Insert vector element from another vector element @@ -12089,24 +12163,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vqsubs_s32() { - let a: i32 = 42; - let b: i32 = 1; - let e: i32 = 41; - let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vqsubd_s64() { - let a: i64 = 42; - let b: i64 = 1; - let e: i64 = 41; - let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vqsubb_u8() { let a: u8 = 42; @@ -12143,6 +12199,24 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubs_s32() { + let a: i32 = 42; + let b: i32 = 1; + let e: i32 = 41; + let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubd_s64() { + let a: i64 = 42; + let b: i64 = 1; + let e: i64 = 41; + let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vrbit_s8() { let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14); @@ -12417,24 +12491,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vqadds_s32() { - let a: i32 = 42; - let b: i32 = 1; - let e: i32 = 43; - let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vqaddd_s64() { - let a: i64 = 42; - let b: i64 = 1; - let e: i64 = 43; - let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vqaddb_u8() { let a: u8 = 42; @@ -12471,6 +12527,24 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqadds_s32() { + let a: i32 = 42; + let b: i32 = 1; + let e: i32 = 43; + let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddd_s64() { + let a: i64 = 42; + let b: i64 = 1; + let e: i64 = 43; + let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; @@ -13736,14 +13810,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vqmovnd_s64() { - let a: i64 = 1; - let e: i32 = 1; - let r: i32 = transmute(vqmovnd_s64(transmute(a))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vqmovnh_u16() { let a: u16 = 1; @@ -13760,6 +13826,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqmovnd_s64() { + let a: i64 = 1; + let e: i32 = 1; + let r: i32 = transmute(vqmovnd_s64(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqmovnd_u64() { let a: u64 = 1; @@ -14047,6 +14121,24 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqrshls_s32() { + let a: i32 = 2; + let b: i32 = 2; + let e: i32 = 8; + let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshld_s64() { + let a: i64 = 2; + let b: i64 = 2; + let e: i64 = 8; + let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqrshlb_s8() { let a: i8 = 1; @@ -14066,20 +14158,20 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqrshls_s32() { - let a: i32 = 1; + unsafe fn test_vqrshls_u32() { + let a: u32 = 2; let b: i32 = 2; - let e: i32 = 4; - let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b))); + let e: u32 = 8; + let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] - unsafe fn test_vqrshld_s64() { - let a: i64 = 1; + unsafe fn test_vqrshld_u64() { + let a: u64 = 2; let b: i64 = 2; - let e: i64 = 4; - let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b))); + let e: u64 = 8; + let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b))); assert_eq!(r, e); } @@ -14101,24 +14193,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vqrshls_u32() { - let a: u32 = 1; - let b: i32 = 2; - let e: u32 = 4; - let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vqrshld_u64() { - let a: u64 = 1; - let b: i64 = 2; - let e: u64 = 4; - let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vqrshrnh_n_s16() { let a: i16 = 4; @@ -14272,6 +14346,15 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqshld_s64() { + let a: i64 = 0; + let b: i64 = 2; + let e: i64 = 0; + let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqshlb_s8() { let a: i8 = 1; @@ -14300,11 +14383,11 @@ mod test { } #[simd_test(enable = "neon")] - unsafe fn test_vqshld_s64() { - let a: i64 = 1; + unsafe fn test_vqshld_u64() { + let a: u64 = 0; let b: i64 = 2; - let e: i64 = 4; - let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b))); + let e: u64 = 0; + let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b))); assert_eq!(r, e); } @@ -14335,15 +14418,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vqshld_u64() { - let a: u64 = 1; - let b: i64 = 2; - let e: u64 = 4; - let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vqshlb_n_s8() { let a: i8 = 1; @@ -14408,6 +14482,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqshrnd_n_s64() { + let a: i64 = 0; + let e: i32 = 0; + let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqshrnh_n_s16() { let a: i16 = 4; @@ -14424,14 +14506,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vqshrnd_n_s64() { - let a: i64 = 4; - let e: i32 = 1; - let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vqshrn_high_n_s16() { let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11); @@ -14459,6 +14533,14 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vqshrnd_n_u64() { + let a: u64 = 0; + let e: u32 = 0; + let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vqshrnh_n_u16() { let a: u16 = 4; @@ -14475,14 +14557,6 @@ mod test { assert_eq!(r, e); } - #[simd_test(enable = "neon")] - unsafe fn test_vqshrnd_n_u64() { - let a: u64 = 4; - let e: u32 = 1; - let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a))); - assert_eq!(r, e); - } - #[simd_test(enable = "neon")] unsafe fn test_vqshrn_high_n_u16() { let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11); diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs index e29c1b36d25c..9097d269893e 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs @@ -1184,9 +1184,7 @@ pub unsafe fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(add))] pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 { - let a: int64x1_t = transmute(a); - let b: int64x1_t = transmute(b); - simd_extract(simd_add(a, b), 0) + a.wrapping_add(b) } /// Vector add. @@ -1194,9 +1192,7 @@ pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 { #[target_feature(enable = "neon")] #[cfg_attr(test, assert_instr(add))] pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 { - let a: uint64x1_t = transmute(a); - let b: uint64x1_t = transmute(b); - simd_extract(simd_add(a, b), 0) + a.wrapping_add(b) } /// Horizontal vector max. diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 835a3aba7498..0387799f6f42 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -13070,7 +13070,7 @@ pub unsafe fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { - static_assert_imm4!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13082,7 +13082,7 @@ pub unsafe fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { - static_assert_imm5!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -13094,7 +13094,7 @@ pub unsafe fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { - static_assert_imm6!(LANE); + static_assert!(LANE : i32 where LANE == 0); simd_insert(b, LANE as u32, a) } @@ -13118,7 +13118,7 @@ pub unsafe fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { - static_assert_imm4!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13130,7 +13130,7 @@ pub unsafe fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_ #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { - static_assert_imm5!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -13142,7 +13142,7 @@ pub unsafe fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_ #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { - static_assert_imm6!(LANE); + static_assert!(LANE : i32 where LANE == 0); simd_insert(b, LANE as u32, a) } @@ -13166,7 +13166,7 @@ pub unsafe fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { - static_assert_imm4!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13178,7 +13178,7 @@ pub unsafe fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_ #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { - static_assert_imm6!(LANE); + static_assert!(LANE : i32 where LANE == 0); simd_insert(b, LANE as u32, a) } @@ -13190,7 +13190,7 @@ pub unsafe fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_ #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { - static_assert_imm3!(LANE); + static_assert_imm4!(LANE); simd_insert(b, LANE as u32, a) } @@ -13202,7 +13202,7 @@ pub unsafe fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { - static_assert_imm4!(LANE); + static_assert_imm3!(LANE); simd_insert(b, LANE as u32, a) } @@ -13214,7 +13214,7 @@ pub unsafe fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { - static_assert_imm5!(LANE); + static_assert_imm2!(LANE); simd_insert(b, LANE as u32, a) } @@ -13226,7 +13226,7 @@ pub unsafe fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { - static_assert_imm6!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -13238,7 +13238,7 @@ pub unsafe fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { - static_assert_imm3!(LANE); + static_assert_imm4!(LANE); simd_insert(b, LANE as u32, a) } @@ -13250,42 +13250,6 @@ pub unsafe fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { - static_assert_imm4!(LANE); - simd_insert(b, LANE as u32, a) -} - -/// Insert vector element from another vector element -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { - static_assert_imm5!(LANE); - simd_insert(b, LANE as u32, a) -} - -/// Insert vector element from another vector element -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { - static_assert_imm6!(LANE); - simd_insert(b, LANE as u32, a) -} - -/// Insert vector element from another vector element -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] -#[rustc_legacy_const_generics(2)] -pub unsafe fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { static_assert_imm3!(LANE); simd_insert(b, LANE as u32, a) } @@ -13297,11 +13261,47 @@ pub unsafe fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t #[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] -pub unsafe fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { +pub unsafe fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { static_assert_imm4!(LANE); simd_insert(b, LANE as u32, a) } +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { + static_assert_imm3!(LANE); + simd_insert(b, LANE as u32, a) +} + /// Insert vector element from another vector element #[inline] #[target_feature(enable = "neon,crypto")] @@ -13310,7 +13310,7 @@ pub unsafe fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8 #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { - static_assert_imm6!(LANE); + static_assert_imm1!(LANE); simd_insert(b, LANE as u32, a) } @@ -21006,144 +21006,144 @@ mod test { #[simd_test(enable = "neon")] unsafe fn test_vqrshl_s8() { - let a: i8x8 = i8x8::new(-128, 0x7F, 2, 3, 4, 5, 6, 7); + let a: i8x8 = i8x8::new(2, -128, 0x7F, 3, 4, 5, 6, 7); let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let e: i8x8 = i8x8::new(-128, 0x7F, 8, 12, 16, 20, 24, 28); + let e: i8x8 = i8x8::new(8, -128, 0x7F, 12, 16, 20, 24, 28); let r: i8x8 = transmute(vqrshl_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_s8() { - let a: i8x16 = i8x16::new(-128, 0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let a: i8x16 = i8x16::new(2, -128, 0x7F, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let e: i8x16 = i8x16::new(-128, 0x7F, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let e: i8x16 = i8x16::new(8, -128, 0x7F, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); let r: i8x16 = transmute(vqrshlq_s8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_s16() { - let a: i16x4 = i16x4::new(-32768, 0x7F_FF, 2, 3); + let a: i16x4 = i16x4::new(2, -32768, 0x7F_FF, 3); let b: i16x4 = i16x4::new(2, 2, 2, 2); - let e: i16x4 = i16x4::new(-32768, 0x7F_FF, 8, 12); + let e: i16x4 = i16x4::new(8, -32768, 0x7F_FF, 12); let r: i16x4 = transmute(vqrshl_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_s16() { - let a: i16x8 = i16x8::new(-32768, 0x7F_FF, 2, 3, 4, 5, 6, 7); + let a: i16x8 = i16x8::new(2, -32768, 0x7F_FF, 3, 4, 5, 6, 7); let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let e: i16x8 = i16x8::new(-32768, 0x7F_FF, 8, 12, 16, 20, 24, 28); + let e: i16x8 = i16x8::new(8, -32768, 0x7F_FF, 12, 16, 20, 24, 28); let r: i16x8 = transmute(vqrshlq_s16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_s32() { - let a: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let a: i32x2 = i32x2::new(2, -2147483648); let b: i32x2 = i32x2::new(2, 2); - let e: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let e: i32x2 = i32x2::new(8, -2147483648); let r: i32x2 = transmute(vqrshl_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_s32() { - let a: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 2, 3); + let a: i32x4 = i32x4::new(2, -2147483648, 0x7F_FF_FF_FF, 3); let b: i32x4 = i32x4::new(2, 2, 2, 2); - let e: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 8, 12); + let e: i32x4 = i32x4::new(8, -2147483648, 0x7F_FF_FF_FF, 12); let r: i32x4 = transmute(vqrshlq_s32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_s64() { - let a: i64x1 = i64x1::new(-9223372036854775808); + let a: i64x1 = i64x1::new(2); let b: i64x1 = i64x1::new(2); - let e: i64x1 = i64x1::new(-9223372036854775808); + let e: i64x1 = i64x1::new(8); let r: i64x1 = transmute(vqrshl_s64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_s64() { - let a: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let a: i64x2 = i64x2::new(2, -9223372036854775808); let b: i64x2 = i64x2::new(2, 2); - let e: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i64x2 = i64x2::new(8, -9223372036854775808); let r: i64x2 = transmute(vqrshlq_s64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_u8() { - let a: u8x8 = u8x8::new(0, 0xFF, 2, 3, 4, 5, 6, 7); + let a: u8x8 = u8x8::new(2, 0, 0xFF, 3, 4, 5, 6, 7); let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let e: u8x8 = u8x8::new(0, 0xFF, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(8, 0, 0xFF, 12, 16, 20, 24, 28); let r: u8x8 = transmute(vqrshl_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_u8() { - let a: u8x16 = u8x16::new(0, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let a: u8x16 = u8x16::new(2, 0, 0xFF, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); - let e: u8x16 = u8x16::new(0, 0xFF, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let e: u8x16 = u8x16::new(8, 0, 0xFF, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); let r: u8x16 = transmute(vqrshlq_u8(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_u16() { - let a: u16x4 = u16x4::new(0, 0xFF_FF, 2, 3); + let a: u16x4 = u16x4::new(2, 0, 0xFF_FF, 3); let b: i16x4 = i16x4::new(2, 2, 2, 2); - let e: u16x4 = u16x4::new(0, 0xFF_FF, 8, 12); + let e: u16x4 = u16x4::new(8, 0, 0xFF_FF, 12); let r: u16x4 = transmute(vqrshl_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_u16() { - let a: u16x8 = u16x8::new(0, 0xFF_FF, 2, 3, 4, 5, 6, 7); + let a: u16x8 = u16x8::new(2, 0, 0xFF_FF, 3, 4, 5, 6, 7); let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); - let e: u16x8 = u16x8::new(0, 0xFF_FF, 8, 12, 16, 20, 24, 28); + let e: u16x8 = u16x8::new(8, 0, 0xFF_FF, 12, 16, 20, 24, 28); let r: u16x8 = transmute(vqrshlq_u16(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_u32() { - let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let a: u32x2 = u32x2::new(2, 0); let b: i32x2 = i32x2::new(2, 2); - let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let e: u32x2 = u32x2::new(8, 0); let r: u32x2 = transmute(vqrshl_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_u32() { - let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 2, 3); + let a: u32x4 = u32x4::new(2, 0, 0xFF_FF_FF_FF, 3); let b: i32x4 = i32x4::new(2, 2, 2, 2); - let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 8, 12); + let e: u32x4 = u32x4::new(8, 0, 0xFF_FF_FF_FF, 12); let r: u32x4 = transmute(vqrshlq_u32(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshl_u64() { - let a: u64x1 = u64x1::new(0); + let a: u64x1 = u64x1::new(2); let b: i64x1 = i64x1::new(2); - let e: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(8); let r: u64x1 = transmute(vqrshl_u64(transmute(a), transmute(b))); assert_eq!(r, e); } #[simd_test(enable = "neon")] unsafe fn test_vqrshlq_u64() { - let a: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let a: u64x2 = u64x2::new(2, 0); let b: i64x2 = i64x2::new(2, 2); - let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u64x2 = u64x2::new(8, 0); let r: u64x2 = transmute(vqrshlq_u64(transmute(a), transmute(b))); assert_eq!(r, e); } diff --git a/library/stdarch/crates/core_arch/src/x86/macros.rs b/library/stdarch/crates/core_arch/src/x86/macros.rs index b9550ce79c4a..e686e65b3038 100644 --- a/library/stdarch/crates/core_arch/src/x86/macros.rs +++ b/library/stdarch/crates/core_arch/src/x86/macros.rs @@ -5,7 +5,10 @@ pub(crate) struct ValidateConstRound; impl ValidateConstRound { pub(crate) const VALID: () = { - assert!(IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, "Invalid IMM value"); + assert!( + IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, + "Invalid IMM value" + ); }; } @@ -70,7 +73,10 @@ macro_rules! static_assert_imm_u8 { pub(crate) struct ValidateConstGatherScale; impl ValidateConstGatherScale { pub(crate) const VALID: () = { - assert!(SCALE == 1 || SCALE == 2 || SCALE == 4 || SCALE == 8, "Invalid SCALE value"); + assert!( + SCALE == 1 || SCALE == 2 || SCALE == 4 || SCALE == 8, + "Invalid SCALE value" + ); }; } diff --git a/library/stdarch/crates/core_arch/src/x86_64/macros.rs b/library/stdarch/crates/core_arch/src/x86_64/macros.rs index 9e3faf444d32..a3ea0e821631 100644 --- a/library/stdarch/crates/core_arch/src/x86_64/macros.rs +++ b/library/stdarch/crates/core_arch/src/x86_64/macros.rs @@ -5,7 +5,10 @@ pub(crate) struct ValidateConstRound; impl ValidateConstRound { pub(crate) const VALID: () = { - assert!(IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, "Invalid IMM value"); + assert!( + IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, + "Invalid IMM value" + ); }; } diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 4b192069b6ff..825ecf511551 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -1843,9 +1843,23 @@ b = 1 validate 41 aarch64 = sqsub -generate i8, i16, i32, i64 +generate i8, i16 aarch64 = uqsub -generate u8, u16, u32, u64 +generate u8, u16 + +/// Saturating subtract +name = vqsub +a = 42 +b = 1 +validate 41 + +aarch64 = uqsub +link-aarch64 = uqsub._EXT_ +generate u32, u64 + +aarch64 = sqsub +link-aarch64 = sqsub._EXT_ +generate i32, i64 /// Halving add name = vhadd @@ -1999,9 +2013,23 @@ b = 1 validate 43 aarch64 = sqadd -generate i8, i16, i32, i64 +generate i8, i16 aarch64 = uqadd -generate u8, u16, u32, u64 +generate u8, u16 + +/// Saturating add +name = vqadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 + +aarch64 = uqadd +link-aarch64 = uqadd._EXT_ +generate u32, u64 + +aarch64 = sqadd +link-aarch64 = sqadd._EXT_ +generate i32, i64 /// Multiply name = vmul @@ -3383,9 +3411,22 @@ a = 1 validate 1 aarch64 = sqxtn -generate i16:i8, i32:i16, i64:i32 +generate i16:i8, i32:i16 aarch64 = uqxtn -generate u16:u8, u32:u16, u64:u32 +generate u16:u8, u32:u16 + +/// Saturating extract narrow +name = vqmovn +a = 1 +validate 1 + +aarch64 = sqxtn +link-aarch64 = scalar.sqxtn._EXT2_._EXT_ +generate i64:i32 + +aarch64 = uqxtn +link-aarch64 = scalar.uqxtn._EXT2_._EXT_ +generate u64:u32 /// Signed saturating extract narrow name = vqmovn_high @@ -3609,12 +3650,13 @@ generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i3 /// Signed saturating rounding shift left name = vqrshl -a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -validate MIN, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 +validate 8, MIN, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = sqrshl link-aarch64 = sqrshl._EXT_ +generate i32, i64 arm = vqrshl link-arm = vqrshifts._EXT_ @@ -3630,17 +3672,18 @@ b = 2 validate 4 aarch64 = sqrshl -generate i8, i16, i32, i64 +generate i8, i16 /// Unsigned signed saturating rounding shift left name = vqrshl out-suffix -a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -validate 0, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 +validate 8, 0, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = uqrshl link-aarch64 = uqrshl._EXT_ +generate u32:i32:u32, u64:i64:u64 arm = vqrshl link-arm = vqrshiftu._EXT_ @@ -3658,7 +3701,7 @@ b = 2 validate 4 aarch64 = uqrshl -generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64 +generate u8:i8:u8, u16:i16:u16 /// Signed saturating rounded shift right narrow name = vqrshrn @@ -3806,6 +3849,7 @@ validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = sqshl link-aarch64 = sqshl._EXT_ +generate i64 arm = vqshl link-arm = vqshifts._EXT_ @@ -3820,7 +3864,7 @@ b = 2 validate 4 aarch64 = sqshl -generate i8, i16, i32, i64 +generate i8, i16, i32 /// Unsigned saturating shift left name = vqshl @@ -3831,6 +3875,7 @@ validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60 aarch64 = uqshl link-aarch64 = uqshl._EXT_ +generate u64:i64:u64 arm = vqshl link-arm = vqshiftu._EXT_ @@ -3847,7 +3892,7 @@ b = 2 validate 4 aarch64 = uqshl -generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64 +generate u8:i8:u8, u16:i16:u16, u32:i32:u32 /// Signed saturating shift left name = vqshl @@ -3915,6 +3960,7 @@ validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = sqshrn link-aarch64 = sqshrn._EXT2_ const-aarch64 = N +generate i64:i32 arm = vqshrn link-arm = vqshiftns._EXT2_ @@ -3932,7 +3978,7 @@ n = 2 validate 1 aarch64 = sqshrn -generate i16:i8, i32:i16, i64:i32 +generate i16:i8, i32:i16 /// Signed saturating shift right narrow name = vqshrn_high @@ -3960,6 +4006,7 @@ validate 0, 1, 2, 3, 4, 5, 6, 7 aarch64 = uqshrn link-aarch64 = uqshrn._EXT2_ const-aarch64 = N +generate u64:u32 arm = vqshrn link-arm = vqshiftnu._EXT2_ @@ -3977,7 +4024,7 @@ n = 2 validate 1 aarch64 = uqshrn -generate u16:u8, u32:u16, u64:u32 +generate u16:u8, u32:u16 /// Unsigned saturating shift right narrow name = vqshrn_high @@ -4261,21 +4308,12 @@ validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 aarch64 = srshl link-aarch64 = srshl._EXT_ +generate i64 arm = vrshl link-arm = vrshifts._EXT_ generate int*_t, int64x*_t -/// Signed rounding shift left -name = vrshl -multi_fn = transmute, {vrshl-in_ntt-noext, transmute(a), transmute(b)} -a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 -b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 - -aarch64 = srshl -generate i64 - /// Unsigned rounding shift left name = vrshl out-suffix @@ -4285,23 +4323,13 @@ validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 aarch64 = urshl link-aarch64 = urshl._EXT_ +generate u64:i64:u64 arm = vrshl link-arm = vrshiftu._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t -/// Unsigned rounding shift left -name = vrshl -out-suffix -multi_fn = transmute, {vrshl-out_ntt-noext, transmute(a), transmute(b)} -a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 -b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 -validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 - -aarch64 = urshl -generate u64:i64:u64 - /// Signed rounding shift right name = vrshr n-suffix @@ -4438,15 +4466,14 @@ name = vrsra n-suffix constn = N multi_fn = static_assert-N-1-bits -multi_fn = vrshr_n-in_ntt-::, b:in_ntt, transmute(b) -multi_fn = transmute, {simd_add, transmute(a), b} +multi_fn = vrshr-nself-::, b:in_t, b +multi_fn = a + b a = 1 b = 4 n = 2 validate 2 -// We use "nop" here to skip the instruction test, since it cannot be optimized correctly. -aarch64 = nop +aarch64 = srsra generate i64 /// Ungisned rounding shift right and accumulate. @@ -4454,21 +4481,20 @@ name = vrsra n-suffix constn = N multi_fn = static_assert-N-1-bits -multi_fn = vrshr_n-in_ntt-::, b:in_ntt, transmute(b) -multi_fn = transmute, {simd_add, transmute(a), b} +multi_fn = vrshr-nself-::, b:in_t, b +multi_fn = a + b a = 1 b = 4 n = 2 validate 2 -// We use "nop" here to skip the instruction test, since it cannot be optimized correctly. -aarch64 = nop +aarch64 = ursra generate u64 /// Insert vector element from another vector element name = vset_lane constn = LANE -multi_fn = static_assert_imm-in_bits_exp_len-LANE +multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_insert, b, LANE as u32, a a = 1 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 @@ -4490,7 +4516,7 @@ generate p64:poly64x1_t:poly64x1_t name = vsetq_lane no-q constn = LANE -multi_fn = static_assert_imm-in_bits_exp_len-LANE +multi_fn = static_assert_imm-in_exp_len-LANE multi_fn = simd_insert, b, LANE as u32, a a = 1 b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 @@ -4547,10 +4573,10 @@ a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 -arm = vshl -link-arm = vshifts._EXT_ aarch64 = sshl link-aarch64 = sshl._EXT_ +arm = vshl +link-arm = vshifts._EXT_ generate int*_t, int64x*_t /// Signed Shift left @@ -4570,10 +4596,10 @@ a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 -arm = vshl -link-arm = vshiftu._EXT_ aarch64 = ushl link-aarch64 = ushl._EXT_ +arm = vshl +link-arm = vshiftu._EXT_ generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t