Modify the implementation of d_s64 suffix instructions (#1167)

This commit is contained in:
Sparrow Li 2021-05-19 10:43:53 +08:00 committed by GitHub
parent a98b05c635
commit 15749b0ed3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 482 additions and 377 deletions

View file

@ -3934,26 +3934,6 @@ pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 {
simd_extract(vqsub_s16(a, b), 0)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqsub))]
pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
let a: int32x2_t = vdup_n_s32(a);
let b: int32x2_t = vdup_n_s32(b);
simd_extract(vqsub_s32(a, b), 0)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqsub))]
pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
let a: int64x1_t = vdup_n_s64(a);
let b: int64x1_t = vdup_n_s64(b);
simd_extract(vqsub_s64(a, b), 0)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
@ -3979,9 +3959,12 @@ pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqsub))]
pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
let a: uint32x2_t = vdup_n_u32(a);
let b: uint32x2_t = vdup_n_u32(b);
simd_extract(vqsub_u32(a, b), 0)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i32")]
fn vqsubs_u32_(a: u32, b: u32) -> u32;
}
vqsubs_u32_(a, b)
}
/// Saturating subtract
@ -3989,9 +3972,38 @@ pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqsub))]
pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
let a: uint64x1_t = vdup_n_u64(a);
let b: uint64x1_t = vdup_n_u64(b);
simd_extract(vqsub_u64(a, b), 0)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i64")]
fn vqsubd_u64_(a: u64, b: u64) -> u64;
}
vqsubd_u64_(a, b)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqsub))]
pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i32")]
fn vqsubs_s32_(a: i32, b: i32) -> i32;
}
vqsubs_s32_(a, b)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqsub))]
pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i64")]
fn vqsubd_s64_(a: i64, b: i64) -> i64;
}
vqsubd_s64_(a, b)
}
/// Reverse bit order
@ -4410,26 +4422,6 @@ pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 {
simd_extract(vqadd_s16(a, b), 0)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqadd))]
pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
let a: int32x2_t = vdup_n_s32(a);
let b: int32x2_t = vdup_n_s32(b);
simd_extract(vqadd_s32(a, b), 0)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqadd))]
pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
let a: int64x1_t = vdup_n_s64(a);
let b: int64x1_t = vdup_n_s64(b);
simd_extract(vqadd_s64(a, b), 0)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
@ -4455,9 +4447,12 @@ pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqadd))]
pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
let a: uint32x2_t = vdup_n_u32(a);
let b: uint32x2_t = vdup_n_u32(b);
simd_extract(vqadd_u32(a, b), 0)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i32")]
fn vqadds_u32_(a: u32, b: u32) -> u32;
}
vqadds_u32_(a, b)
}
/// Saturating add
@ -4465,9 +4460,38 @@ pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqadd))]
pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
let a: uint64x1_t = vdup_n_u64(a);
let b: uint64x1_t = vdup_n_u64(b);
simd_extract(vqadd_u64(a, b), 0)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i64")]
fn vqaddd_u64_(a: u64, b: u64) -> u64;
}
vqaddd_u64_(a, b)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqadd))]
pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i32")]
fn vqadds_s32_(a: i32, b: i32) -> i32;
}
vqadds_s32_(a, b)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqadd))]
pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i64")]
fn vqaddd_s64_(a: i64, b: i64) -> i64;
}
vqaddd_s64_(a, b)
}
/// Multiply
@ -5932,14 +5956,6 @@ pub unsafe fn vqmovns_s32(a: i32) -> i16 {
simd_extract(vqmovn_s32(vdupq_n_s32(a)), 0)
}
/// Saturating extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqxtn))]
pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
simd_extract(vqmovn_s64(vdupq_n_s64(a)), 0)
}
/// Saturating extract narrow
#[inline]
#[target_feature(enable = "neon")]
@ -5956,12 +5972,30 @@ pub unsafe fn vqmovns_u32(a: u32) -> u16 {
simd_extract(vqmovn_u32(vdupq_n_u32(a)), 0)
}
/// Saturating extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqxtn))]
pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
fn vqmovnd_s64_(a: i64) -> i32;
}
vqmovnd_s64_(a)
}
/// Saturating extract narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqxtn))]
pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
simd_extract(vqmovn_u64(vdupq_n_u64(a)), 0)
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
fn vqmovnd_u64_(a: u64) -> u32;
}
vqmovnd_u64_(a)
}
/// Signed saturating extract narrow
@ -6228,6 +6262,32 @@ pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t)
vqsubs_s32(a, vqrdmulhs_laneq_s32::<LANE>(b, c))
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshl))]
pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")]
fn vqrshls_s32_(a: i32, b: i32) -> i32;
}
vqrshls_s32_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshl))]
pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")]
fn vqrshld_s64_(a: i64, b: i64) -> i64;
}
vqrshld_s64_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
@ -6248,24 +6308,30 @@ pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
simd_extract(vqrshl_s16(a, b), 0)
}
/// Signed saturating rounding shift left
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshl))]
pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
let a: int32x2_t = vdup_n_s32(a);
let b: int32x2_t = vdup_n_s32(b);
simd_extract(vqrshl_s32(a, b), 0)
#[cfg_attr(test, assert_instr(uqrshl))]
pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")]
fn vqrshls_u32_(a: u32, b: i32) -> u32;
}
vqrshls_u32_(a, b)
}
/// Signed saturating rounding shift left
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshl))]
pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
let a: int64x1_t = vdup_n_s64(a);
let b: int64x1_t = vdup_n_s64(b);
simd_extract(vqrshl_s64(a, b), 0)
#[cfg_attr(test, assert_instr(uqrshl))]
pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")]
fn vqrshld_u64_(a: u64, b: i64) -> u64;
}
vqrshld_u64_(a, b)
}
/// Unsigned signed saturating rounding shift left
@ -6288,26 +6354,6 @@ pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
simd_extract(vqrshl_u16(a, b), 0)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshl))]
pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
let a: uint32x2_t = vdup_n_u32(a);
let b: int32x2_t = vdup_n_s32(b);
simd_extract(vqrshl_u32(a, b), 0)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshl))]
pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
let a: uint64x1_t = vdup_n_u64(a);
let b: int64x1_t = vdup_n_s64(b);
simd_extract(vqrshl_u64(a, b), 0)
}
/// Signed saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
@ -6497,6 +6543,19 @@ pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) ->
simd_shuffle4!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
}
/// Signed saturating shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqshl))]
pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")]
fn vqshld_s64_(a: i64, b: i64) -> i64;
}
vqshld_s64_(a, b)
}
/// Signed saturating shift left
#[inline]
#[target_feature(enable = "neon")]
@ -6524,13 +6583,17 @@ pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
simd_extract(c, 0)
}
/// Signed saturating shift left
/// Unsigned saturating shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqshl))]
pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
let c: int64x1_t = vqshl_s64(vdup_n_s64(a), vdup_n_s64(b));
simd_extract(c, 0)
#[cfg_attr(test, assert_instr(uqshl))]
pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")]
fn vqshld_u64_(a: u64, b: i64) -> u64;
}
vqshld_u64_(a, b)
}
/// Unsigned saturating shift left
@ -6560,15 +6623,6 @@ pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
simd_extract(c, 0)
}
/// Unsigned saturating shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqshl))]
pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
let c: uint64x1_t = vqshl_u64(vdup_n_u64(a), vdup_n_s64(b));
simd_extract(c, 0)
}
/// Signed saturating shift left
#[inline]
#[target_feature(enable = "neon")]
@ -6649,6 +6703,21 @@ pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
simd_extract(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
}
/// Signed saturating shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
static_assert!(N : i32 where N >= 1 && N <= 32);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")]
fn vqshrnd_n_s64_(a: i64, n: i32) -> i32;
}
vqshrnd_n_s64_(a, N)
}
/// Signed saturating shift right narrow
#[inline]
#[target_feature(enable = "neon")]
@ -6669,16 +6738,6 @@ pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
simd_extract(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
}
/// Signed saturating shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
static_assert!(N : i32 where N >= 1 && N <= 32);
simd_extract(vqshrn_n_s64::<N>(vdupq_n_s64(a)), 0)
}
/// Signed saturating shift right narrow
#[inline]
#[target_feature(enable = "neon")]
@ -6709,6 +6768,21 @@ pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int
simd_shuffle4!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
}
/// Unsigned saturating shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
static_assert!(N : i32 where N >= 1 && N <= 32);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")]
fn vqshrnd_n_u64_(a: u64, n: i32) -> u32;
}
vqshrnd_n_u64_(a, N)
}
/// Unsigned saturating shift right narrow
#[inline]
#[target_feature(enable = "neon")]
@ -6729,16 +6803,6 @@ pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
simd_extract(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
}
/// Unsigned saturating shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
static_assert!(N : i32 where N >= 1 && N <= 32);
simd_extract(vqshrn_n_u64::<N>(vdupq_n_u64(a)), 0)
}
/// Unsigned saturating shift right narrow
#[inline]
#[target_feature(enable = "neon")]
@ -7654,7 +7718,12 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(srshl))]
pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
transmute(vrshl_s64(transmute(a), transmute(b)))
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")]
fn vrshld_s64_(a: i64, b: i64) -> i64;
}
vrshld_s64_(a, b)
}
/// Unsigned rounding shift left
@ -7662,7 +7731,12 @@ pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(urshl))]
pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
transmute(vrshl_u64(transmute(a), transmute(b)))
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")]
fn vrshld_u64_(a: u64, b: i64) -> u64;
}
vrshld_u64_(a, b)
}
/// Signed rounding shift right
@ -7748,23 +7822,23 @@ pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> u
/// Signed rounding shift right and accumulate.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(nop, N = 2))]
#[cfg_attr(test, assert_instr(srsra, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
static_assert!(N : i32 where N >= 1 && N <= 64);
let b: int64x1_t = vrshr_n_s64::<N>(transmute(b));
transmute(simd_add(transmute(a), b))
let b: i64 = vrshrd_n_s64::<N>(b);
a + b
}
/// Ungisned rounding shift right and accumulate.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(nop, N = 2))]
#[cfg_attr(test, assert_instr(ursra, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
static_assert!(N : i32 where N >= 1 && N <= 64);
let b: uint64x1_t = vrshr_n_u64::<N>(transmute(b));
transmute(simd_add(transmute(a), b))
let b: u64 = vrshrd_n_u64::<N>(b);
a + b
}
/// Insert vector element from another vector element
@ -12089,24 +12163,6 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsubs_s32() {
let a: i32 = 42;
let b: i32 = 1;
let e: i32 = 41;
let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsubd_s64() {
let a: i64 = 42;
let b: i64 = 1;
let e: i64 = 41;
let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsubb_u8() {
let a: u8 = 42;
@ -12143,6 +12199,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsubs_s32() {
let a: i32 = 42;
let b: i32 = 1;
let e: i32 = 41;
let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsubd_s64() {
let a: i64 = 42;
let b: i64 = 1;
let e: i64 = 41;
let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrbit_s8() {
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
@ -12417,24 +12491,6 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqadds_s32() {
let a: i32 = 42;
let b: i32 = 1;
let e: i32 = 43;
let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqaddd_s64() {
let a: i64 = 42;
let b: i64 = 1;
let e: i64 = 43;
let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqaddb_u8() {
let a: u8 = 42;
@ -12471,6 +12527,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqadds_s32() {
let a: i32 = 42;
let b: i32 = 1;
let e: i32 = 43;
let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqaddd_s64() {
let a: i64 = 42;
let b: i64 = 1;
let e: i64 = 43;
let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_f64() {
let a: f64 = 1.0;
@ -13736,14 +13810,6 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqmovnd_s64() {
let a: i64 = 1;
let e: i32 = 1;
let r: i32 = transmute(vqmovnd_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqmovnh_u16() {
let a: u16 = 1;
@ -13760,6 +13826,14 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqmovnd_s64() {
let a: i64 = 1;
let e: i32 = 1;
let r: i32 = transmute(vqmovnd_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqmovnd_u64() {
let a: u64 = 1;
@ -14047,6 +14121,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshls_s32() {
let a: i32 = 2;
let b: i32 = 2;
let e: i32 = 8;
let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshld_s64() {
let a: i64 = 2;
let b: i64 = 2;
let e: i64 = 8;
let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlb_s8() {
let a: i8 = 1;
@ -14066,20 +14158,20 @@ mod test {
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshls_s32() {
let a: i32 = 1;
unsafe fn test_vqrshls_u32() {
let a: u32 = 2;
let b: i32 = 2;
let e: i32 = 4;
let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b)));
let e: u32 = 8;
let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshld_s64() {
let a: i64 = 1;
unsafe fn test_vqrshld_u64() {
let a: u64 = 2;
let b: i64 = 2;
let e: i64 = 4;
let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b)));
let e: u64 = 8;
let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
@ -14101,24 +14193,6 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshls_u32() {
let a: u32 = 1;
let b: i32 = 2;
let e: u32 = 4;
let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshld_u64() {
let a: u64 = 1;
let b: i64 = 2;
let e: u64 = 4;
let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrnh_n_s16() {
let a: i16 = 4;
@ -14272,6 +14346,15 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshld_s64() {
let a: i64 = 0;
let b: i64 = 2;
let e: i64 = 0;
let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshlb_s8() {
let a: i8 = 1;
@ -14300,11 +14383,11 @@ mod test {
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshld_s64() {
let a: i64 = 1;
unsafe fn test_vqshld_u64() {
let a: u64 = 0;
let b: i64 = 2;
let e: i64 = 4;
let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b)));
let e: u64 = 0;
let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
@ -14335,15 +14418,6 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshld_u64() {
let a: u64 = 1;
let b: i64 = 2;
let e: u64 = 4;
let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshlb_n_s8() {
let a: i8 = 1;
@ -14408,6 +14482,14 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshrnd_n_s64() {
let a: i64 = 0;
let e: i32 = 0;
let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshrnh_n_s16() {
let a: i16 = 4;
@ -14424,14 +14506,6 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshrnd_n_s64() {
let a: i64 = 4;
let e: i32 = 1;
let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshrn_high_n_s16() {
let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
@ -14459,6 +14533,14 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshrnd_n_u64() {
let a: u64 = 0;
let e: u32 = 0;
let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshrnh_n_u16() {
let a: u16 = 4;
@ -14475,14 +14557,6 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshrnd_n_u64() {
let a: u64 = 4;
let e: u32 = 1;
let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqshrn_high_n_u16() {
let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);

View file

@ -1184,9 +1184,7 @@ pub unsafe fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
let a: int64x1_t = transmute(a);
let b: int64x1_t = transmute(b);
simd_extract(simd_add(a, b), 0)
a.wrapping_add(b)
}
/// Vector add.
@ -1194,9 +1192,7 @@ pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(add))]
pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
let a: uint64x1_t = transmute(a);
let b: uint64x1_t = transmute(b);
simd_extract(simd_add(a, b), 0)
a.wrapping_add(b)
}
/// Horizontal vector max.

View file

@ -13070,7 +13070,7 @@ pub unsafe fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
static_assert_imm4!(LANE);
static_assert_imm2!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13082,7 +13082,7 @@ pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
static_assert_imm5!(LANE);
static_assert_imm1!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13094,7 +13094,7 @@ pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
static_assert_imm6!(LANE);
static_assert!(LANE : i32 where LANE == 0);
simd_insert(b, LANE as u32, a)
}
@ -13118,7 +13118,7 @@ pub unsafe fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
static_assert_imm4!(LANE);
static_assert_imm2!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13130,7 +13130,7 @@ pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
static_assert_imm5!(LANE);
static_assert_imm1!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13142,7 +13142,7 @@ pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
static_assert_imm6!(LANE);
static_assert!(LANE : i32 where LANE == 0);
simd_insert(b, LANE as u32, a)
}
@ -13166,7 +13166,7 @@ pub unsafe fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
static_assert_imm4!(LANE);
static_assert_imm2!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13178,7 +13178,7 @@ pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
static_assert_imm6!(LANE);
static_assert!(LANE : i32 where LANE == 0);
simd_insert(b, LANE as u32, a)
}
@ -13190,7 +13190,7 @@ pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
static_assert_imm3!(LANE);
static_assert_imm4!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13202,7 +13202,7 @@ pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
static_assert_imm4!(LANE);
static_assert_imm3!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13214,7 +13214,7 @@ pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
static_assert_imm5!(LANE);
static_assert_imm2!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13226,7 +13226,7 @@ pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
static_assert_imm6!(LANE);
static_assert_imm1!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13238,7 +13238,7 @@ pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
static_assert_imm3!(LANE);
static_assert_imm4!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13250,42 +13250,6 @@ pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
static_assert_imm4!(LANE);
simd_insert(b, LANE as u32, a)
}
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
static_assert_imm5!(LANE);
simd_insert(b, LANE as u32, a)
}
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
static_assert_imm6!(LANE);
simd_insert(b, LANE as u32, a)
}
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
static_assert_imm3!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -13297,11 +13261,47 @@ pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
static_assert_imm2!(LANE);
simd_insert(b, LANE as u32, a)
}
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
static_assert_imm1!(LANE);
simd_insert(b, LANE as u32, a)
}
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
static_assert_imm4!(LANE);
simd_insert(b, LANE as u32, a)
}
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
static_assert_imm3!(LANE);
simd_insert(b, LANE as u32, a)
}
/// Insert vector element from another vector element
#[inline]
#[target_feature(enable = "neon,crypto")]
@ -13310,7 +13310,7 @@ pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
static_assert_imm6!(LANE);
static_assert_imm1!(LANE);
simd_insert(b, LANE as u32, a)
}
@ -21006,144 +21006,144 @@ mod test {
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_s8() {
let a: i8x8 = i8x8::new(-128, 0x7F, 2, 3, 4, 5, 6, 7);
let a: i8x8 = i8x8::new(2, -128, 0x7F, 3, 4, 5, 6, 7);
let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: i8x8 = i8x8::new(-128, 0x7F, 8, 12, 16, 20, 24, 28);
let e: i8x8 = i8x8::new(8, -128, 0x7F, 12, 16, 20, 24, 28);
let r: i8x8 = transmute(vqrshl_s8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_s8() {
let a: i8x16 = i8x16::new(-128, 0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let a: i8x16 = i8x16::new(2, -128, 0x7F, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i8x16 = i8x16::new(-128, 0x7F, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
let e: i8x16 = i8x16::new(8, -128, 0x7F, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
let r: i8x16 = transmute(vqrshlq_s8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_s16() {
let a: i16x4 = i16x4::new(-32768, 0x7F_FF, 2, 3);
let a: i16x4 = i16x4::new(2, -32768, 0x7F_FF, 3);
let b: i16x4 = i16x4::new(2, 2, 2, 2);
let e: i16x4 = i16x4::new(-32768, 0x7F_FF, 8, 12);
let e: i16x4 = i16x4::new(8, -32768, 0x7F_FF, 12);
let r: i16x4 = transmute(vqrshl_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_s16() {
let a: i16x8 = i16x8::new(-32768, 0x7F_FF, 2, 3, 4, 5, 6, 7);
let a: i16x8 = i16x8::new(2, -32768, 0x7F_FF, 3, 4, 5, 6, 7);
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: i16x8 = i16x8::new(-32768, 0x7F_FF, 8, 12, 16, 20, 24, 28);
let e: i16x8 = i16x8::new(8, -32768, 0x7F_FF, 12, 16, 20, 24, 28);
let r: i16x8 = transmute(vqrshlq_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_s32() {
let a: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
let a: i32x2 = i32x2::new(2, -2147483648);
let b: i32x2 = i32x2::new(2, 2);
let e: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
let e: i32x2 = i32x2::new(8, -2147483648);
let r: i32x2 = transmute(vqrshl_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_s32() {
let a: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 2, 3);
let a: i32x4 = i32x4::new(2, -2147483648, 0x7F_FF_FF_FF, 3);
let b: i32x4 = i32x4::new(2, 2, 2, 2);
let e: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 8, 12);
let e: i32x4 = i32x4::new(8, -2147483648, 0x7F_FF_FF_FF, 12);
let r: i32x4 = transmute(vqrshlq_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_s64() {
let a: i64x1 = i64x1::new(-9223372036854775808);
let a: i64x1 = i64x1::new(2);
let b: i64x1 = i64x1::new(2);
let e: i64x1 = i64x1::new(-9223372036854775808);
let e: i64x1 = i64x1::new(8);
let r: i64x1 = transmute(vqrshl_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_s64() {
let a: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
let a: i64x2 = i64x2::new(2, -9223372036854775808);
let b: i64x2 = i64x2::new(2, 2);
let e: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
let e: i64x2 = i64x2::new(8, -9223372036854775808);
let r: i64x2 = transmute(vqrshlq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_u8() {
let a: u8x8 = u8x8::new(0, 0xFF, 2, 3, 4, 5, 6, 7);
let a: u8x8 = u8x8::new(2, 0, 0xFF, 3, 4, 5, 6, 7);
let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: u8x8 = u8x8::new(0, 0xFF, 8, 12, 16, 20, 24, 28);
let e: u8x8 = u8x8::new(8, 0, 0xFF, 12, 16, 20, 24, 28);
let r: u8x8 = transmute(vqrshl_u8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_u8() {
let a: u8x16 = u8x16::new(0, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let a: u8x16 = u8x16::new(2, 0, 0xFF, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let e: u8x16 = u8x16::new(0, 0xFF, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
let e: u8x16 = u8x16::new(8, 0, 0xFF, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
let r: u8x16 = transmute(vqrshlq_u8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_u16() {
let a: u16x4 = u16x4::new(0, 0xFF_FF, 2, 3);
let a: u16x4 = u16x4::new(2, 0, 0xFF_FF, 3);
let b: i16x4 = i16x4::new(2, 2, 2, 2);
let e: u16x4 = u16x4::new(0, 0xFF_FF, 8, 12);
let e: u16x4 = u16x4::new(8, 0, 0xFF_FF, 12);
let r: u16x4 = transmute(vqrshl_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_u16() {
let a: u16x8 = u16x8::new(0, 0xFF_FF, 2, 3, 4, 5, 6, 7);
let a: u16x8 = u16x8::new(2, 0, 0xFF_FF, 3, 4, 5, 6, 7);
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: u16x8 = u16x8::new(0, 0xFF_FF, 8, 12, 16, 20, 24, 28);
let e: u16x8 = u16x8::new(8, 0, 0xFF_FF, 12, 16, 20, 24, 28);
let r: u16x8 = transmute(vqrshlq_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_u32() {
let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
let a: u32x2 = u32x2::new(2, 0);
let b: i32x2 = i32x2::new(2, 2);
let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
let e: u32x2 = u32x2::new(8, 0);
let r: u32x2 = transmute(vqrshl_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_u32() {
let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 2, 3);
let a: u32x4 = u32x4::new(2, 0, 0xFF_FF_FF_FF, 3);
let b: i32x4 = i32x4::new(2, 2, 2, 2);
let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 8, 12);
let e: u32x4 = u32x4::new(8, 0, 0xFF_FF_FF_FF, 12);
let r: u32x4 = transmute(vqrshlq_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_u64() {
let a: u64x1 = u64x1::new(0);
let a: u64x1 = u64x1::new(2);
let b: i64x1 = i64x1::new(2);
let e: u64x1 = u64x1::new(0);
let e: u64x1 = u64x1::new(8);
let r: u64x1 = transmute(vqrshl_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_u64() {
let a: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let a: u64x2 = u64x2::new(2, 0);
let b: i64x2 = i64x2::new(2, 2);
let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let e: u64x2 = u64x2::new(8, 0);
let r: u64x2 = transmute(vqrshlq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}

View file

@ -5,7 +5,10 @@
pub(crate) struct ValidateConstRound<const IMM: i32>;
impl<const IMM: i32> ValidateConstRound<IMM> {
pub(crate) const VALID: () = {
assert!(IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, "Invalid IMM value");
assert!(
IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11,
"Invalid IMM value"
);
};
}
@ -70,7 +73,10 @@ macro_rules! static_assert_imm_u8 {
pub(crate) struct ValidateConstGatherScale<const SCALE: i32>;
impl<const SCALE: i32> ValidateConstGatherScale<SCALE> {
pub(crate) const VALID: () = {
assert!(SCALE == 1 || SCALE == 2 || SCALE == 4 || SCALE == 8, "Invalid SCALE value");
assert!(
SCALE == 1 || SCALE == 2 || SCALE == 4 || SCALE == 8,
"Invalid SCALE value"
);
};
}

View file

@ -5,7 +5,10 @@
pub(crate) struct ValidateConstRound<const IMM: i32>;
impl<const IMM: i32> ValidateConstRound<IMM> {
pub(crate) const VALID: () = {
assert!(IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, "Invalid IMM value");
assert!(
IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11,
"Invalid IMM value"
);
};
}

View file

@ -1843,9 +1843,23 @@ b = 1
validate 41
aarch64 = sqsub
generate i8, i16, i32, i64
generate i8, i16
aarch64 = uqsub
generate u8, u16, u32, u64
generate u8, u16
/// Saturating subtract
name = vqsub
a = 42
b = 1
validate 41
aarch64 = uqsub
link-aarch64 = uqsub._EXT_
generate u32, u64
aarch64 = sqsub
link-aarch64 = sqsub._EXT_
generate i32, i64
/// Halving add
name = vhadd
@ -1999,9 +2013,23 @@ b = 1
validate 43
aarch64 = sqadd
generate i8, i16, i32, i64
generate i8, i16
aarch64 = uqadd
generate u8, u16, u32, u64
generate u8, u16
/// Saturating add
name = vqadd
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
aarch64 = uqadd
link-aarch64 = uqadd._EXT_
generate u32, u64
aarch64 = sqadd
link-aarch64 = sqadd._EXT_
generate i32, i64
/// Multiply
name = vmul
@ -3383,9 +3411,22 @@ a = 1
validate 1
aarch64 = sqxtn
generate i16:i8, i32:i16, i64:i32
generate i16:i8, i32:i16
aarch64 = uqxtn
generate u16:u8, u32:u16, u64:u32
generate u16:u8, u32:u16
/// Saturating extract narrow
name = vqmovn
a = 1
validate 1
aarch64 = sqxtn
link-aarch64 = scalar.sqxtn._EXT2_._EXT_
generate i64:i32
aarch64 = uqxtn
link-aarch64 = scalar.uqxtn._EXT2_._EXT_
generate u64:u32
/// Signed saturating extract narrow
name = vqmovn_high
@ -3609,12 +3650,13 @@ generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i3
/// Signed saturating rounding shift left
name = vqrshl
a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
validate MIN, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
validate 8, MIN, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
aarch64 = sqrshl
link-aarch64 = sqrshl._EXT_
generate i32, i64
arm = vqrshl
link-arm = vqrshifts._EXT_
@ -3630,17 +3672,18 @@ b = 2
validate 4
aarch64 = sqrshl
generate i8, i16, i32, i64
generate i8, i16
/// Unsigned signed saturating rounding shift left
name = vqrshl
out-suffix
a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
validate 0, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
validate 8, 0, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
aarch64 = uqrshl
link-aarch64 = uqrshl._EXT_
generate u32:i32:u32, u64:i64:u64
arm = vqrshl
link-arm = vqrshiftu._EXT_
@ -3658,7 +3701,7 @@ b = 2
validate 4
aarch64 = uqrshl
generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64
generate u8:i8:u8, u16:i16:u16
/// Signed saturating rounded shift right narrow
name = vqrshrn
@ -3806,6 +3849,7 @@ validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
aarch64 = sqshl
link-aarch64 = sqshl._EXT_
generate i64
arm = vqshl
link-arm = vqshifts._EXT_
@ -3820,7 +3864,7 @@ b = 2
validate 4
aarch64 = sqshl
generate i8, i16, i32, i64
generate i8, i16, i32
/// Unsigned saturating shift left
name = vqshl
@ -3831,6 +3875,7 @@ validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
aarch64 = uqshl
link-aarch64 = uqshl._EXT_
generate u64:i64:u64
arm = vqshl
link-arm = vqshiftu._EXT_
@ -3847,7 +3892,7 @@ b = 2
validate 4
aarch64 = uqshl
generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64
generate u8:i8:u8, u16:i16:u16, u32:i32:u32
/// Signed saturating shift left
name = vqshl
@ -3915,6 +3960,7 @@ validate 0, 1, 2, 3, 4, 5, 6, 7
aarch64 = sqshrn
link-aarch64 = sqshrn._EXT2_
const-aarch64 = N
generate i64:i32
arm = vqshrn
link-arm = vqshiftns._EXT2_
@ -3932,7 +3978,7 @@ n = 2
validate 1
aarch64 = sqshrn
generate i16:i8, i32:i16, i64:i32
generate i16:i8, i32:i16
/// Signed saturating shift right narrow
name = vqshrn_high
@ -3960,6 +4006,7 @@ validate 0, 1, 2, 3, 4, 5, 6, 7
aarch64 = uqshrn
link-aarch64 = uqshrn._EXT2_
const-aarch64 = N
generate u64:u32
arm = vqshrn
link-arm = vqshiftnu._EXT2_
@ -3977,7 +4024,7 @@ n = 2
validate 1
aarch64 = uqshrn
generate u16:u8, u32:u16, u64:u32
generate u16:u8, u32:u16
/// Unsigned saturating shift right narrow
name = vqshrn_high
@ -4261,21 +4308,12 @@ validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
aarch64 = srshl
link-aarch64 = srshl._EXT_
generate i64
arm = vrshl
link-arm = vrshifts._EXT_
generate int*_t, int64x*_t
/// Signed rounding shift left
name = vrshl
multi_fn = transmute, {vrshl-in_ntt-noext, transmute(a), transmute(b)}
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
aarch64 = srshl
generate i64
/// Unsigned rounding shift left
name = vrshl
out-suffix
@ -4285,23 +4323,13 @@ validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
aarch64 = urshl
link-aarch64 = urshl._EXT_
generate u64:i64:u64
arm = vrshl
link-arm = vrshiftu._EXT_
generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
/// Unsigned rounding shift left
name = vrshl
out-suffix
multi_fn = transmute, {vrshl-out_ntt-noext, transmute(a), transmute(b)}
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
aarch64 = urshl
generate u64:i64:u64
/// Signed rounding shift right
name = vrshr
n-suffix
@ -4438,15 +4466,14 @@ name = vrsra
n-suffix
constn = N
multi_fn = static_assert-N-1-bits
multi_fn = vrshr_n-in_ntt-::<N>, b:in_ntt, transmute(b)
multi_fn = transmute, {simd_add, transmute(a), b}
multi_fn = vrshr-nself-::<N>, b:in_t, b
multi_fn = a + b
a = 1
b = 4
n = 2
validate 2
// We use "nop" here to skip the instruction test, since it cannot be optimized correctly.
aarch64 = nop
aarch64 = srsra
generate i64
/// Ungisned rounding shift right and accumulate.
@ -4454,21 +4481,20 @@ name = vrsra
n-suffix
constn = N
multi_fn = static_assert-N-1-bits
multi_fn = vrshr_n-in_ntt-::<N>, b:in_ntt, transmute(b)
multi_fn = transmute, {simd_add, transmute(a), b}
multi_fn = vrshr-nself-::<N>, b:in_t, b
multi_fn = a + b
a = 1
b = 4
n = 2
validate 2
// We use "nop" here to skip the instruction test, since it cannot be optimized correctly.
aarch64 = nop
aarch64 = ursra
generate u64
/// Insert vector element from another vector element
name = vset_lane
constn = LANE
multi_fn = static_assert_imm-in_bits_exp_len-LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = simd_insert, b, LANE as u32, a
a = 1
b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@ -4490,7 +4516,7 @@ generate p64:poly64x1_t:poly64x1_t
name = vsetq_lane
no-q
constn = LANE
multi_fn = static_assert_imm-in_bits_exp_len-LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = simd_insert, b, LANE as u32, a
a = 1
b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@ -4547,10 +4573,10 @@ a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
arm = vshl
link-arm = vshifts._EXT_
aarch64 = sshl
link-aarch64 = sshl._EXT_
arm = vshl
link-arm = vshifts._EXT_
generate int*_t, int64x*_t
/// Signed Shift left
@ -4570,10 +4596,10 @@ a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
arm = vshl
link-arm = vshiftu._EXT_
aarch64 = ushl
link-aarch64 = ushl._EXT_
arm = vshl
link-arm = vshiftu._EXT_
generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t