Modify the implementation of d_s64 suffix instructions (#1167)
This commit is contained in:
parent
a98b05c635
commit
15749b0ed3
6 changed files with 482 additions and 377 deletions
|
|
@ -3934,26 +3934,6 @@ pub unsafe fn vqsubh_s16(a: i16, b: i16) -> i16 {
|
|||
simd_extract(vqsub_s16(a, b), 0)
|
||||
}
|
||||
|
||||
/// Saturating subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqsub))]
|
||||
pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
|
||||
let a: int32x2_t = vdup_n_s32(a);
|
||||
let b: int32x2_t = vdup_n_s32(b);
|
||||
simd_extract(vqsub_s32(a, b), 0)
|
||||
}
|
||||
|
||||
/// Saturating subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqsub))]
|
||||
pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
|
||||
let a: int64x1_t = vdup_n_s64(a);
|
||||
let b: int64x1_t = vdup_n_s64(b);
|
||||
simd_extract(vqsub_s64(a, b), 0)
|
||||
}
|
||||
|
||||
/// Saturating subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -3979,9 +3959,12 @@ pub unsafe fn vqsubh_u16(a: u16, b: u16) -> u16 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqsub))]
|
||||
pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
|
||||
let a: uint32x2_t = vdup_n_u32(a);
|
||||
let b: uint32x2_t = vdup_n_u32(b);
|
||||
simd_extract(vqsub_u32(a, b), 0)
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i32")]
|
||||
fn vqsubs_u32_(a: u32, b: u32) -> u32;
|
||||
}
|
||||
vqsubs_u32_(a, b)
|
||||
}
|
||||
|
||||
/// Saturating subtract
|
||||
|
|
@ -3989,9 +3972,38 @@ pub unsafe fn vqsubs_u32(a: u32, b: u32) -> u32 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqsub))]
|
||||
pub unsafe fn vqsubd_u64(a: u64, b: u64) -> u64 {
|
||||
let a: uint64x1_t = vdup_n_u64(a);
|
||||
let b: uint64x1_t = vdup_n_u64(b);
|
||||
simd_extract(vqsub_u64(a, b), 0)
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.i64")]
|
||||
fn vqsubd_u64_(a: u64, b: u64) -> u64;
|
||||
}
|
||||
vqsubd_u64_(a, b)
|
||||
}
|
||||
|
||||
/// Saturating subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqsub))]
|
||||
pub unsafe fn vqsubs_s32(a: i32, b: i32) -> i32 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i32")]
|
||||
fn vqsubs_s32_(a: i32, b: i32) -> i32;
|
||||
}
|
||||
vqsubs_s32_(a, b)
|
||||
}
|
||||
|
||||
/// Saturating subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqsub))]
|
||||
pub unsafe fn vqsubd_s64(a: i64, b: i64) -> i64 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.i64")]
|
||||
fn vqsubd_s64_(a: i64, b: i64) -> i64;
|
||||
}
|
||||
vqsubd_s64_(a, b)
|
||||
}
|
||||
|
||||
/// Reverse bit order
|
||||
|
|
@ -4410,26 +4422,6 @@ pub unsafe fn vqaddh_s16(a: i16, b: i16) -> i16 {
|
|||
simd_extract(vqadd_s16(a, b), 0)
|
||||
}
|
||||
|
||||
/// Saturating add
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqadd))]
|
||||
pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
|
||||
let a: int32x2_t = vdup_n_s32(a);
|
||||
let b: int32x2_t = vdup_n_s32(b);
|
||||
simd_extract(vqadd_s32(a, b), 0)
|
||||
}
|
||||
|
||||
/// Saturating add
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqadd))]
|
||||
pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
|
||||
let a: int64x1_t = vdup_n_s64(a);
|
||||
let b: int64x1_t = vdup_n_s64(b);
|
||||
simd_extract(vqadd_s64(a, b), 0)
|
||||
}
|
||||
|
||||
/// Saturating add
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -4455,9 +4447,12 @@ pub unsafe fn vqaddh_u16(a: u16, b: u16) -> u16 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqadd))]
|
||||
pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
|
||||
let a: uint32x2_t = vdup_n_u32(a);
|
||||
let b: uint32x2_t = vdup_n_u32(b);
|
||||
simd_extract(vqadd_u32(a, b), 0)
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i32")]
|
||||
fn vqadds_u32_(a: u32, b: u32) -> u32;
|
||||
}
|
||||
vqadds_u32_(a, b)
|
||||
}
|
||||
|
||||
/// Saturating add
|
||||
|
|
@ -4465,9 +4460,38 @@ pub unsafe fn vqadds_u32(a: u32, b: u32) -> u32 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqadd))]
|
||||
pub unsafe fn vqaddd_u64(a: u64, b: u64) -> u64 {
|
||||
let a: uint64x1_t = vdup_n_u64(a);
|
||||
let b: uint64x1_t = vdup_n_u64(b);
|
||||
simd_extract(vqadd_u64(a, b), 0)
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.i64")]
|
||||
fn vqaddd_u64_(a: u64, b: u64) -> u64;
|
||||
}
|
||||
vqaddd_u64_(a, b)
|
||||
}
|
||||
|
||||
/// Saturating add
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqadd))]
|
||||
pub unsafe fn vqadds_s32(a: i32, b: i32) -> i32 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i32")]
|
||||
fn vqadds_s32_(a: i32, b: i32) -> i32;
|
||||
}
|
||||
vqadds_s32_(a, b)
|
||||
}
|
||||
|
||||
/// Saturating add
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqadd))]
|
||||
pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.i64")]
|
||||
fn vqaddd_s64_(a: i64, b: i64) -> i64;
|
||||
}
|
||||
vqaddd_s64_(a, b)
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
|
|
@ -5932,14 +5956,6 @@ pub unsafe fn vqmovns_s32(a: i32) -> i16 {
|
|||
simd_extract(vqmovn_s32(vdupq_n_s32(a)), 0)
|
||||
}
|
||||
|
||||
/// Saturating extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqxtn))]
|
||||
pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
|
||||
simd_extract(vqmovn_s64(vdupq_n_s64(a)), 0)
|
||||
}
|
||||
|
||||
/// Saturating extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -5956,12 +5972,30 @@ pub unsafe fn vqmovns_u32(a: u32) -> u16 {
|
|||
simd_extract(vqmovn_u32(vdupq_n_u32(a)), 0)
|
||||
}
|
||||
|
||||
/// Saturating extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqxtn))]
|
||||
pub unsafe fn vqmovnd_s64(a: i64) -> i32 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.sqxtn.i32.i64")]
|
||||
fn vqmovnd_s64_(a: i64) -> i32;
|
||||
}
|
||||
vqmovnd_s64_(a)
|
||||
}
|
||||
|
||||
/// Saturating extract narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqxtn))]
|
||||
pub unsafe fn vqmovnd_u64(a: u64) -> u32 {
|
||||
simd_extract(vqmovn_u64(vdupq_n_u64(a)), 0)
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.scalar.uqxtn.i32.i64")]
|
||||
fn vqmovnd_u64_(a: u64) -> u32;
|
||||
}
|
||||
vqmovnd_u64_(a)
|
||||
}
|
||||
|
||||
/// Signed saturating extract narrow
|
||||
|
|
@ -6228,6 +6262,32 @@ pub unsafe fn vqrdmlshs_laneq_s32<const LANE: i32>(a: i32, b: i32, c: int32x4_t)
|
|||
vqsubs_s32(a, vqrdmulhs_laneq_s32::<LANE>(b, c))
|
||||
}
|
||||
|
||||
/// Signed saturating rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqrshl))]
|
||||
pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i32")]
|
||||
fn vqrshls_s32_(a: i32, b: i32) -> i32;
|
||||
}
|
||||
vqrshls_s32_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqrshl))]
|
||||
pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.i64")]
|
||||
fn vqrshld_s64_(a: i64, b: i64) -> i64;
|
||||
}
|
||||
vqrshld_s64_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6248,24 +6308,30 @@ pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
|
|||
simd_extract(vqrshl_s16(a, b), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating rounding shift left
|
||||
/// Unsigned signed saturating rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqrshl))]
|
||||
pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
|
||||
let a: int32x2_t = vdup_n_s32(a);
|
||||
let b: int32x2_t = vdup_n_s32(b);
|
||||
simd_extract(vqrshl_s32(a, b), 0)
|
||||
#[cfg_attr(test, assert_instr(uqrshl))]
|
||||
pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i32")]
|
||||
fn vqrshls_u32_(a: u32, b: i32) -> u32;
|
||||
}
|
||||
vqrshls_u32_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating rounding shift left
|
||||
/// Unsigned signed saturating rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqrshl))]
|
||||
pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
|
||||
let a: int64x1_t = vdup_n_s64(a);
|
||||
let b: int64x1_t = vdup_n_s64(b);
|
||||
simd_extract(vqrshl_s64(a, b), 0)
|
||||
#[cfg_attr(test, assert_instr(uqrshl))]
|
||||
pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.i64")]
|
||||
fn vqrshld_u64_(a: u64, b: i64) -> u64;
|
||||
}
|
||||
vqrshld_u64_(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned signed saturating rounding shift left
|
||||
|
|
@ -6288,26 +6354,6 @@ pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
|
|||
simd_extract(vqrshl_u16(a, b), 0)
|
||||
}
|
||||
|
||||
/// Unsigned signed saturating rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqrshl))]
|
||||
pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
|
||||
let a: uint32x2_t = vdup_n_u32(a);
|
||||
let b: int32x2_t = vdup_n_s32(b);
|
||||
simd_extract(vqrshl_u32(a, b), 0)
|
||||
}
|
||||
|
||||
/// Unsigned signed saturating rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqrshl))]
|
||||
pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
|
||||
let a: uint64x1_t = vdup_n_u64(a);
|
||||
let b: int64x1_t = vdup_n_s64(b);
|
||||
simd_extract(vqrshl_u64(a, b), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating rounded shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6497,6 +6543,19 @@ pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) ->
|
|||
simd_shuffle4!(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl))]
|
||||
pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.i64")]
|
||||
fn vqshld_s64_(a: i64, b: i64) -> i64;
|
||||
}
|
||||
vqshld_s64_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6524,13 +6583,17 @@ pub unsafe fn vqshls_s32(a: i32, b: i32) -> i32 {
|
|||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshl))]
|
||||
pub unsafe fn vqshld_s64(a: i64, b: i64) -> i64 {
|
||||
let c: int64x1_t = vqshl_s64(vdup_n_s64(a), vdup_n_s64(b));
|
||||
simd_extract(c, 0)
|
||||
#[cfg_attr(test, assert_instr(uqshl))]
|
||||
pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.i64")]
|
||||
fn vqshld_u64_(a: u64, b: i64) -> u64;
|
||||
}
|
||||
vqshld_u64_(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
|
|
@ -6560,15 +6623,6 @@ pub unsafe fn vqshls_u32(a: u32, b: i32) -> u32 {
|
|||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshl))]
|
||||
pub unsafe fn vqshld_u64(a: u64, b: i64) -> u64 {
|
||||
let c: uint64x1_t = vqshl_u64(vdup_n_u64(a), vdup_n_s64(b));
|
||||
simd_extract(c, 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6649,6 +6703,21 @@ pub unsafe fn vqshld_n_u64<const N: i32>(a: u64) -> u64 {
|
|||
simd_extract(vqshl_n_u64::<N>(vdup_n_u64(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.i32")]
|
||||
fn vqshrnd_n_s64_(a: i64, n: i32) -> i32;
|
||||
}
|
||||
vqshrnd_n_s64_(a, N)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6669,16 +6738,6 @@ pub unsafe fn vqshrns_n_s32<const N: i32>(a: i32) -> i16 {
|
|||
simd_extract(vqshrn_n_s32::<N>(vdupq_n_s32(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrnd_n_s64<const N: i32>(a: i64) -> i32 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_extract(vqshrn_n_s64::<N>(vdupq_n_s64(a)), 0)
|
||||
}
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6709,6 +6768,21 @@ pub unsafe fn vqshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int
|
|||
simd_shuffle4!(a, vqshrn_n_s64::<N>(b), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.i32")]
|
||||
fn vqshrnd_n_u64_(a: u64, n: i32) -> u32;
|
||||
}
|
||||
vqshrnd_n_u64_(a, N)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6729,16 +6803,6 @@ pub unsafe fn vqshrns_n_u32<const N: i32>(a: u32) -> u16 {
|
|||
simd_extract(vqshrn_n_u32::<N>(vdupq_n_u32(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uqshrn, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vqshrnd_n_u64<const N: i32>(a: u64) -> u32 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_extract(vqshrn_n_u64::<N>(vdupq_n_u64(a)), 0)
|
||||
}
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -7654,7 +7718,12 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(srshl))]
|
||||
pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
|
||||
transmute(vrshl_s64(transmute(a), transmute(b)))
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.i64")]
|
||||
fn vrshld_s64_(a: i64, b: i64) -> i64;
|
||||
}
|
||||
vrshld_s64_(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned rounding shift left
|
||||
|
|
@ -7662,7 +7731,12 @@ pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(urshl))]
|
||||
pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
|
||||
transmute(vrshl_u64(transmute(a), transmute(b)))
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.i64")]
|
||||
fn vrshld_u64_(a: u64, b: i64) -> u64;
|
||||
}
|
||||
vrshld_u64_(a, b)
|
||||
}
|
||||
|
||||
/// Signed rounding shift right
|
||||
|
|
@ -7748,23 +7822,23 @@ pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> u
|
|||
/// Signed rounding shift right and accumulate.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[cfg_attr(test, assert_instr(srsra, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
let b: int64x1_t = vrshr_n_s64::<N>(transmute(b));
|
||||
transmute(simd_add(transmute(a), b))
|
||||
let b: i64 = vrshrd_n_s64::<N>(b);
|
||||
a + b
|
||||
}
|
||||
|
||||
/// Ungisned rounding shift right and accumulate.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[cfg_attr(test, assert_instr(ursra, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
let b: uint64x1_t = vrshr_n_u64::<N>(transmute(b));
|
||||
transmute(simd_add(transmute(a), b))
|
||||
let b: u64 = vrshrd_n_u64::<N>(b);
|
||||
a + b
|
||||
}
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
|
|
@ -12089,24 +12163,6 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqsubs_s32() {
|
||||
let a: i32 = 42;
|
||||
let b: i32 = 1;
|
||||
let e: i32 = 41;
|
||||
let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqsubd_s64() {
|
||||
let a: i64 = 42;
|
||||
let b: i64 = 1;
|
||||
let e: i64 = 41;
|
||||
let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqsubb_u8() {
|
||||
let a: u8 = 42;
|
||||
|
|
@ -12143,6 +12199,24 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqsubs_s32() {
|
||||
let a: i32 = 42;
|
||||
let b: i32 = 1;
|
||||
let e: i32 = 41;
|
||||
let r: i32 = transmute(vqsubs_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqsubd_s64() {
|
||||
let a: i64 = 42;
|
||||
let b: i64 = 1;
|
||||
let e: i64 = 41;
|
||||
let r: i64 = transmute(vqsubd_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrbit_s8() {
|
||||
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
|
||||
|
|
@ -12417,24 +12491,6 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqadds_s32() {
|
||||
let a: i32 = 42;
|
||||
let b: i32 = 1;
|
||||
let e: i32 = 43;
|
||||
let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqaddd_s64() {
|
||||
let a: i64 = 42;
|
||||
let b: i64 = 1;
|
||||
let e: i64 = 43;
|
||||
let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqaddb_u8() {
|
||||
let a: u8 = 42;
|
||||
|
|
@ -12471,6 +12527,24 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqadds_s32() {
|
||||
let a: i32 = 42;
|
||||
let b: i32 = 1;
|
||||
let e: i32 = 43;
|
||||
let r: i32 = transmute(vqadds_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqaddd_s64() {
|
||||
let a: i64 = 42;
|
||||
let b: i64 = 1;
|
||||
let e: i64 = 43;
|
||||
let r: i64 = transmute(vqaddd_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_f64() {
|
||||
let a: f64 = 1.0;
|
||||
|
|
@ -13736,14 +13810,6 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqmovnd_s64() {
|
||||
let a: i64 = 1;
|
||||
let e: i32 = 1;
|
||||
let r: i32 = transmute(vqmovnd_s64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqmovnh_u16() {
|
||||
let a: u16 = 1;
|
||||
|
|
@ -13760,6 +13826,14 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqmovnd_s64() {
|
||||
let a: i64 = 1;
|
||||
let e: i32 = 1;
|
||||
let r: i32 = transmute(vqmovnd_s64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqmovnd_u64() {
|
||||
let a: u64 = 1;
|
||||
|
|
@ -14047,6 +14121,24 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshls_s32() {
|
||||
let a: i32 = 2;
|
||||
let b: i32 = 2;
|
||||
let e: i32 = 8;
|
||||
let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshld_s64() {
|
||||
let a: i64 = 2;
|
||||
let b: i64 = 2;
|
||||
let e: i64 = 8;
|
||||
let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlb_s8() {
|
||||
let a: i8 = 1;
|
||||
|
|
@ -14066,20 +14158,20 @@ mod test {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshls_s32() {
|
||||
let a: i32 = 1;
|
||||
unsafe fn test_vqrshls_u32() {
|
||||
let a: u32 = 2;
|
||||
let b: i32 = 2;
|
||||
let e: i32 = 4;
|
||||
let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b)));
|
||||
let e: u32 = 8;
|
||||
let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshld_s64() {
|
||||
let a: i64 = 1;
|
||||
unsafe fn test_vqrshld_u64() {
|
||||
let a: u64 = 2;
|
||||
let b: i64 = 2;
|
||||
let e: i64 = 4;
|
||||
let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b)));
|
||||
let e: u64 = 8;
|
||||
let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -14101,24 +14193,6 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshls_u32() {
|
||||
let a: u32 = 1;
|
||||
let b: i32 = 2;
|
||||
let e: u32 = 4;
|
||||
let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshld_u64() {
|
||||
let a: u64 = 1;
|
||||
let b: i64 = 2;
|
||||
let e: u64 = 4;
|
||||
let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshrnh_n_s16() {
|
||||
let a: i16 = 4;
|
||||
|
|
@ -14272,6 +14346,15 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshld_s64() {
|
||||
let a: i64 = 0;
|
||||
let b: i64 = 2;
|
||||
let e: i64 = 0;
|
||||
let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlb_s8() {
|
||||
let a: i8 = 1;
|
||||
|
|
@ -14300,11 +14383,11 @@ mod test {
|
|||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshld_s64() {
|
||||
let a: i64 = 1;
|
||||
unsafe fn test_vqshld_u64() {
|
||||
let a: u64 = 0;
|
||||
let b: i64 = 2;
|
||||
let e: i64 = 4;
|
||||
let r: i64 = transmute(vqshld_s64(transmute(a), transmute(b)));
|
||||
let e: u64 = 0;
|
||||
let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
|
|
@ -14335,15 +14418,6 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshld_u64() {
|
||||
let a: u64 = 1;
|
||||
let b: i64 = 2;
|
||||
let e: u64 = 4;
|
||||
let r: u64 = transmute(vqshld_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshlb_n_s8() {
|
||||
let a: i8 = 1;
|
||||
|
|
@ -14408,6 +14482,14 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnd_n_s64() {
|
||||
let a: i64 = 0;
|
||||
let e: i32 = 0;
|
||||
let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnh_n_s16() {
|
||||
let a: i16 = 4;
|
||||
|
|
@ -14424,14 +14506,6 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnd_n_s64() {
|
||||
let a: i64 = 4;
|
||||
let e: i32 = 1;
|
||||
let r: i32 = transmute(vqshrnd_n_s64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrn_high_n_s16() {
|
||||
let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
|
|
@ -14459,6 +14533,14 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnd_n_u64() {
|
||||
let a: u64 = 0;
|
||||
let e: u32 = 0;
|
||||
let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnh_n_u16() {
|
||||
let a: u16 = 4;
|
||||
|
|
@ -14475,14 +14557,6 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrnd_n_u64() {
|
||||
let a: u64 = 4;
|
||||
let e: u32 = 1;
|
||||
let r: u32 = transmute(vqshrnd_n_u64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqshrn_high_n_u16() {
|
||||
let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
|
|
|
|||
|
|
@ -1184,9 +1184,7 @@ pub unsafe fn vadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(add))]
|
||||
pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
|
||||
let a: int64x1_t = transmute(a);
|
||||
let b: int64x1_t = transmute(b);
|
||||
simd_extract(simd_add(a, b), 0)
|
||||
a.wrapping_add(b)
|
||||
}
|
||||
|
||||
/// Vector add.
|
||||
|
|
@ -1194,9 +1192,7 @@ pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(add))]
|
||||
pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
|
||||
let a: uint64x1_t = transmute(a);
|
||||
let b: uint64x1_t = transmute(b);
|
||||
simd_extract(simd_add(a, b), 0)
|
||||
a.wrapping_add(b)
|
||||
}
|
||||
|
||||
/// Horizontal vector max.
|
||||
|
|
|
|||
|
|
@ -13070,7 +13070,7 @@ pub unsafe fn vset_lane_s8<const LANE: i32>(a: i8, b: int8x8_t) -> int8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t {
|
||||
static_assert_imm4!(LANE);
|
||||
static_assert_imm2!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13082,7 +13082,7 @@ pub unsafe fn vset_lane_s16<const LANE: i32>(a: i16, b: int16x4_t) -> int16x4_t
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t {
|
||||
static_assert_imm5!(LANE);
|
||||
static_assert_imm1!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13094,7 +13094,7 @@ pub unsafe fn vset_lane_s32<const LANE: i32>(a: i32, b: int32x2_t) -> int32x2_t
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vset_lane_s64<const LANE: i32>(a: i64, b: int64x1_t) -> int64x1_t {
|
||||
static_assert_imm6!(LANE);
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13118,7 +13118,7 @@ pub unsafe fn vset_lane_u8<const LANE: i32>(a: u8, b: uint8x8_t) -> uint8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_t {
|
||||
static_assert_imm4!(LANE);
|
||||
static_assert_imm2!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13130,7 +13130,7 @@ pub unsafe fn vset_lane_u16<const LANE: i32>(a: u16, b: uint16x4_t) -> uint16x4_
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_t {
|
||||
static_assert_imm5!(LANE);
|
||||
static_assert_imm1!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13142,7 +13142,7 @@ pub unsafe fn vset_lane_u32<const LANE: i32>(a: u32, b: uint32x2_t) -> uint32x2_
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vset_lane_u64<const LANE: i32>(a: u64, b: uint64x1_t) -> uint64x1_t {
|
||||
static_assert_imm6!(LANE);
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13166,7 +13166,7 @@ pub unsafe fn vset_lane_p8<const LANE: i32>(a: p8, b: poly8x8_t) -> poly8x8_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_t {
|
||||
static_assert_imm4!(LANE);
|
||||
static_assert_imm2!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13178,7 +13178,7 @@ pub unsafe fn vset_lane_p16<const LANE: i32>(a: p16, b: poly16x4_t) -> poly16x4_
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_t {
|
||||
static_assert_imm6!(LANE);
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13190,7 +13190,7 @@ pub unsafe fn vset_lane_p64<const LANE: i32>(a: p64, b: poly64x1_t) -> poly64x1_
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
|
||||
static_assert_imm3!(LANE);
|
||||
static_assert_imm4!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13202,7 +13202,7 @@ pub unsafe fn vsetq_lane_s8<const LANE: i32>(a: i8, b: int8x16_t) -> int8x16_t {
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t {
|
||||
static_assert_imm4!(LANE);
|
||||
static_assert_imm3!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13214,7 +13214,7 @@ pub unsafe fn vsetq_lane_s16<const LANE: i32>(a: i16, b: int16x8_t) -> int16x8_t
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t {
|
||||
static_assert_imm5!(LANE);
|
||||
static_assert_imm2!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13226,7 +13226,7 @@ pub unsafe fn vsetq_lane_s32<const LANE: i32>(a: i32, b: int32x4_t) -> int32x4_t
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t {
|
||||
static_assert_imm6!(LANE);
|
||||
static_assert_imm1!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13238,7 +13238,7 @@ pub unsafe fn vsetq_lane_s64<const LANE: i32>(a: i64, b: int64x2_t) -> int64x2_t
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t {
|
||||
static_assert_imm3!(LANE);
|
||||
static_assert_imm4!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -13250,42 +13250,6 @@ pub unsafe fn vsetq_lane_u8<const LANE: i32>(a: u8, b: uint8x16_t) -> uint8x16_t
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_u16<const LANE: i32>(a: u16, b: uint16x8_t) -> uint16x8_t {
|
||||
static_assert_imm4!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
|
||||
static_assert_imm5!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
|
||||
static_assert_imm6!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
|
||||
static_assert_imm3!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
|
@ -13297,11 +13261,47 @@ pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t
|
|||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
|
||||
pub unsafe fn vsetq_lane_u32<const LANE: i32>(a: u32, b: uint32x4_t) -> uint32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_u64<const LANE: i32>(a: u64, b: uint64x2_t) -> uint64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_p8<const LANE: i32>(a: p8, b: poly8x16_t) -> poly8x16_t {
|
||||
static_assert_imm4!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8_t {
|
||||
static_assert_imm3!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,crypto")]
|
||||
|
|
@ -13310,7 +13310,7 @@ pub unsafe fn vsetq_lane_p16<const LANE: i32>(a: p16, b: poly16x8_t) -> poly16x8
|
|||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsetq_lane_p64<const LANE: i32>(a: p64, b: poly64x2_t) -> poly64x2_t {
|
||||
static_assert_imm6!(LANE);
|
||||
static_assert_imm1!(LANE);
|
||||
simd_insert(b, LANE as u32, a)
|
||||
}
|
||||
|
||||
|
|
@ -21006,144 +21006,144 @@ mod test {
|
|||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_s8() {
|
||||
let a: i8x8 = i8x8::new(-128, 0x7F, 2, 3, 4, 5, 6, 7);
|
||||
let a: i8x8 = i8x8::new(2, -128, 0x7F, 3, 4, 5, 6, 7);
|
||||
let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let e: i8x8 = i8x8::new(-128, 0x7F, 8, 12, 16, 20, 24, 28);
|
||||
let e: i8x8 = i8x8::new(8, -128, 0x7F, 12, 16, 20, 24, 28);
|
||||
let r: i8x8 = transmute(vqrshl_s8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlq_s8() {
|
||||
let a: i8x16 = i8x16::new(-128, 0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let a: i8x16 = i8x16::new(2, -128, 0x7F, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let e: i8x16 = i8x16::new(-128, 0x7F, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let e: i8x16 = i8x16::new(8, -128, 0x7F, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let r: i8x16 = transmute(vqrshlq_s8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_s16() {
|
||||
let a: i16x4 = i16x4::new(-32768, 0x7F_FF, 2, 3);
|
||||
let a: i16x4 = i16x4::new(2, -32768, 0x7F_FF, 3);
|
||||
let b: i16x4 = i16x4::new(2, 2, 2, 2);
|
||||
let e: i16x4 = i16x4::new(-32768, 0x7F_FF, 8, 12);
|
||||
let e: i16x4 = i16x4::new(8, -32768, 0x7F_FF, 12);
|
||||
let r: i16x4 = transmute(vqrshl_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlq_s16() {
|
||||
let a: i16x8 = i16x8::new(-32768, 0x7F_FF, 2, 3, 4, 5, 6, 7);
|
||||
let a: i16x8 = i16x8::new(2, -32768, 0x7F_FF, 3, 4, 5, 6, 7);
|
||||
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let e: i16x8 = i16x8::new(-32768, 0x7F_FF, 8, 12, 16, 20, 24, 28);
|
||||
let e: i16x8 = i16x8::new(8, -32768, 0x7F_FF, 12, 16, 20, 24, 28);
|
||||
let r: i16x8 = transmute(vqrshlq_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_s32() {
|
||||
let a: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
|
||||
let a: i32x2 = i32x2::new(2, -2147483648);
|
||||
let b: i32x2 = i32x2::new(2, 2);
|
||||
let e: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
|
||||
let e: i32x2 = i32x2::new(8, -2147483648);
|
||||
let r: i32x2 = transmute(vqrshl_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlq_s32() {
|
||||
let a: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 2, 3);
|
||||
let a: i32x4 = i32x4::new(2, -2147483648, 0x7F_FF_FF_FF, 3);
|
||||
let b: i32x4 = i32x4::new(2, 2, 2, 2);
|
||||
let e: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 8, 12);
|
||||
let e: i32x4 = i32x4::new(8, -2147483648, 0x7F_FF_FF_FF, 12);
|
||||
let r: i32x4 = transmute(vqrshlq_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_s64() {
|
||||
let a: i64x1 = i64x1::new(-9223372036854775808);
|
||||
let a: i64x1 = i64x1::new(2);
|
||||
let b: i64x1 = i64x1::new(2);
|
||||
let e: i64x1 = i64x1::new(-9223372036854775808);
|
||||
let e: i64x1 = i64x1::new(8);
|
||||
let r: i64x1 = transmute(vqrshl_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlq_s64() {
|
||||
let a: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let a: i64x2 = i64x2::new(2, -9223372036854775808);
|
||||
let b: i64x2 = i64x2::new(2, 2);
|
||||
let e: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e: i64x2 = i64x2::new(8, -9223372036854775808);
|
||||
let r: i64x2 = transmute(vqrshlq_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_u8() {
|
||||
let a: u8x8 = u8x8::new(0, 0xFF, 2, 3, 4, 5, 6, 7);
|
||||
let a: u8x8 = u8x8::new(2, 0, 0xFF, 3, 4, 5, 6, 7);
|
||||
let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let e: u8x8 = u8x8::new(0, 0xFF, 8, 12, 16, 20, 24, 28);
|
||||
let e: u8x8 = u8x8::new(8, 0, 0xFF, 12, 16, 20, 24, 28);
|
||||
let r: u8x8 = transmute(vqrshl_u8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlq_u8() {
|
||||
let a: u8x16 = u8x16::new(0, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let a: u8x16 = u8x16::new(2, 0, 0xFF, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let e: u8x16 = u8x16::new(0, 0xFF, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let e: u8x16 = u8x16::new(8, 0, 0xFF, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let r: u8x16 = transmute(vqrshlq_u8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_u16() {
|
||||
let a: u16x4 = u16x4::new(0, 0xFF_FF, 2, 3);
|
||||
let a: u16x4 = u16x4::new(2, 0, 0xFF_FF, 3);
|
||||
let b: i16x4 = i16x4::new(2, 2, 2, 2);
|
||||
let e: u16x4 = u16x4::new(0, 0xFF_FF, 8, 12);
|
||||
let e: u16x4 = u16x4::new(8, 0, 0xFF_FF, 12);
|
||||
let r: u16x4 = transmute(vqrshl_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlq_u16() {
|
||||
let a: u16x8 = u16x8::new(0, 0xFF_FF, 2, 3, 4, 5, 6, 7);
|
||||
let a: u16x8 = u16x8::new(2, 0, 0xFF_FF, 3, 4, 5, 6, 7);
|
||||
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let e: u16x8 = u16x8::new(0, 0xFF_FF, 8, 12, 16, 20, 24, 28);
|
||||
let e: u16x8 = u16x8::new(8, 0, 0xFF_FF, 12, 16, 20, 24, 28);
|
||||
let r: u16x8 = transmute(vqrshlq_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_u32() {
|
||||
let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
|
||||
let a: u32x2 = u32x2::new(2, 0);
|
||||
let b: i32x2 = i32x2::new(2, 2);
|
||||
let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
|
||||
let e: u32x2 = u32x2::new(8, 0);
|
||||
let r: u32x2 = transmute(vqrshl_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlq_u32() {
|
||||
let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 2, 3);
|
||||
let a: u32x4 = u32x4::new(2, 0, 0xFF_FF_FF_FF, 3);
|
||||
let b: i32x4 = i32x4::new(2, 2, 2, 2);
|
||||
let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 8, 12);
|
||||
let e: u32x4 = u32x4::new(8, 0, 0xFF_FF_FF_FF, 12);
|
||||
let r: u32x4 = transmute(vqrshlq_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_u64() {
|
||||
let a: u64x1 = u64x1::new(0);
|
||||
let a: u64x1 = u64x1::new(2);
|
||||
let b: i64x1 = i64x1::new(2);
|
||||
let e: u64x1 = u64x1::new(0);
|
||||
let e: u64x1 = u64x1::new(8);
|
||||
let r: u64x1 = transmute(vqrshl_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshlq_u64() {
|
||||
let a: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let a: u64x2 = u64x2::new(2, 0);
|
||||
let b: i64x2 = i64x2::new(2, 2);
|
||||
let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
|
||||
let e: u64x2 = u64x2::new(8, 0);
|
||||
let r: u64x2 = transmute(vqrshlq_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,10 @@
|
|||
pub(crate) struct ValidateConstRound<const IMM: i32>;
|
||||
impl<const IMM: i32> ValidateConstRound<IMM> {
|
||||
pub(crate) const VALID: () = {
|
||||
assert!(IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, "Invalid IMM value");
|
||||
assert!(
|
||||
IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11,
|
||||
"Invalid IMM value"
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
|
|
@ -70,7 +73,10 @@ macro_rules! static_assert_imm_u8 {
|
|||
pub(crate) struct ValidateConstGatherScale<const SCALE: i32>;
|
||||
impl<const SCALE: i32> ValidateConstGatherScale<SCALE> {
|
||||
pub(crate) const VALID: () = {
|
||||
assert!(SCALE == 1 || SCALE == 2 || SCALE == 4 || SCALE == 8, "Invalid SCALE value");
|
||||
assert!(
|
||||
SCALE == 1 || SCALE == 2 || SCALE == 4 || SCALE == 8,
|
||||
"Invalid SCALE value"
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,10 @@
|
|||
pub(crate) struct ValidateConstRound<const IMM: i32>;
|
||||
impl<const IMM: i32> ValidateConstRound<IMM> {
|
||||
pub(crate) const VALID: () = {
|
||||
assert!(IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, "Invalid IMM value");
|
||||
assert!(
|
||||
IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11,
|
||||
"Invalid IMM value"
|
||||
);
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1843,9 +1843,23 @@ b = 1
|
|||
validate 41
|
||||
|
||||
aarch64 = sqsub
|
||||
generate i8, i16, i32, i64
|
||||
generate i8, i16
|
||||
aarch64 = uqsub
|
||||
generate u8, u16, u32, u64
|
||||
generate u8, u16
|
||||
|
||||
/// Saturating subtract
|
||||
name = vqsub
|
||||
a = 42
|
||||
b = 1
|
||||
validate 41
|
||||
|
||||
aarch64 = uqsub
|
||||
link-aarch64 = uqsub._EXT_
|
||||
generate u32, u64
|
||||
|
||||
aarch64 = sqsub
|
||||
link-aarch64 = sqsub._EXT_
|
||||
generate i32, i64
|
||||
|
||||
/// Halving add
|
||||
name = vhadd
|
||||
|
|
@ -1999,9 +2013,23 @@ b = 1
|
|||
validate 43
|
||||
|
||||
aarch64 = sqadd
|
||||
generate i8, i16, i32, i64
|
||||
generate i8, i16
|
||||
aarch64 = uqadd
|
||||
generate u8, u16, u32, u64
|
||||
generate u8, u16
|
||||
|
||||
/// Saturating add
|
||||
name = vqadd
|
||||
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
|
||||
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
|
||||
|
||||
aarch64 = uqadd
|
||||
link-aarch64 = uqadd._EXT_
|
||||
generate u32, u64
|
||||
|
||||
aarch64 = sqadd
|
||||
link-aarch64 = sqadd._EXT_
|
||||
generate i32, i64
|
||||
|
||||
/// Multiply
|
||||
name = vmul
|
||||
|
|
@ -3383,9 +3411,22 @@ a = 1
|
|||
validate 1
|
||||
|
||||
aarch64 = sqxtn
|
||||
generate i16:i8, i32:i16, i64:i32
|
||||
generate i16:i8, i32:i16
|
||||
aarch64 = uqxtn
|
||||
generate u16:u8, u32:u16, u64:u32
|
||||
generate u16:u8, u32:u16
|
||||
|
||||
/// Saturating extract narrow
|
||||
name = vqmovn
|
||||
a = 1
|
||||
validate 1
|
||||
|
||||
aarch64 = sqxtn
|
||||
link-aarch64 = scalar.sqxtn._EXT2_._EXT_
|
||||
generate i64:i32
|
||||
|
||||
aarch64 = uqxtn
|
||||
link-aarch64 = scalar.uqxtn._EXT2_._EXT_
|
||||
generate u64:u32
|
||||
|
||||
/// Signed saturating extract narrow
|
||||
name = vqmovn_high
|
||||
|
|
@ -3609,12 +3650,13 @@ generate i16:i16:int16x4_t:i16, i16:i16:int16x8_t:i16, i32:i32:int32x2_t:i32, i3
|
|||
|
||||
/// Signed saturating rounding shift left
|
||||
name = vqrshl
|
||||
a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate MIN, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
validate 8, MIN, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
|
||||
aarch64 = sqrshl
|
||||
link-aarch64 = sqrshl._EXT_
|
||||
generate i32, i64
|
||||
|
||||
arm = vqrshl
|
||||
link-arm = vqrshifts._EXT_
|
||||
|
|
@ -3630,17 +3672,18 @@ b = 2
|
|||
validate 4
|
||||
|
||||
aarch64 = sqrshl
|
||||
generate i8, i16, i32, i64
|
||||
generate i8, i16
|
||||
|
||||
/// Unsigned signed saturating rounding shift left
|
||||
name = vqrshl
|
||||
out-suffix
|
||||
a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
a = 2, MIN, MAX, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 0, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
validate 8, 0, MAX, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
||||
|
||||
aarch64 = uqrshl
|
||||
link-aarch64 = uqrshl._EXT_
|
||||
generate u32:i32:u32, u64:i64:u64
|
||||
|
||||
arm = vqrshl
|
||||
link-arm = vqrshiftu._EXT_
|
||||
|
|
@ -3658,7 +3701,7 @@ b = 2
|
|||
validate 4
|
||||
|
||||
aarch64 = uqrshl
|
||||
generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64
|
||||
generate u8:i8:u8, u16:i16:u16
|
||||
|
||||
/// Signed saturating rounded shift right narrow
|
||||
name = vqrshrn
|
||||
|
|
@ -3806,6 +3849,7 @@ validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
|||
|
||||
aarch64 = sqshl
|
||||
link-aarch64 = sqshl._EXT_
|
||||
generate i64
|
||||
|
||||
arm = vqshl
|
||||
link-arm = vqshifts._EXT_
|
||||
|
|
@ -3820,7 +3864,7 @@ b = 2
|
|||
validate 4
|
||||
|
||||
aarch64 = sqshl
|
||||
generate i8, i16, i32, i64
|
||||
generate i8, i16, i32
|
||||
|
||||
/// Unsigned saturating shift left
|
||||
name = vqshl
|
||||
|
|
@ -3831,6 +3875,7 @@ validate 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
|
|||
|
||||
aarch64 = uqshl
|
||||
link-aarch64 = uqshl._EXT_
|
||||
generate u64:i64:u64
|
||||
|
||||
arm = vqshl
|
||||
link-arm = vqshiftu._EXT_
|
||||
|
|
@ -3847,7 +3892,7 @@ b = 2
|
|||
validate 4
|
||||
|
||||
aarch64 = uqshl
|
||||
generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64
|
||||
generate u8:i8:u8, u16:i16:u16, u32:i32:u32
|
||||
|
||||
/// Signed saturating shift left
|
||||
name = vqshl
|
||||
|
|
@ -3915,6 +3960,7 @@ validate 0, 1, 2, 3, 4, 5, 6, 7
|
|||
aarch64 = sqshrn
|
||||
link-aarch64 = sqshrn._EXT2_
|
||||
const-aarch64 = N
|
||||
generate i64:i32
|
||||
|
||||
arm = vqshrn
|
||||
link-arm = vqshiftns._EXT2_
|
||||
|
|
@ -3932,7 +3978,7 @@ n = 2
|
|||
validate 1
|
||||
|
||||
aarch64 = sqshrn
|
||||
generate i16:i8, i32:i16, i64:i32
|
||||
generate i16:i8, i32:i16
|
||||
|
||||
/// Signed saturating shift right narrow
|
||||
name = vqshrn_high
|
||||
|
|
@ -3960,6 +4006,7 @@ validate 0, 1, 2, 3, 4, 5, 6, 7
|
|||
aarch64 = uqshrn
|
||||
link-aarch64 = uqshrn._EXT2_
|
||||
const-aarch64 = N
|
||||
generate u64:u32
|
||||
|
||||
arm = vqshrn
|
||||
link-arm = vqshiftnu._EXT2_
|
||||
|
|
@ -3977,7 +4024,7 @@ n = 2
|
|||
validate 1
|
||||
|
||||
aarch64 = uqshrn
|
||||
generate u16:u8, u32:u16, u64:u32
|
||||
generate u16:u8, u32:u16
|
||||
|
||||
/// Unsigned saturating shift right narrow
|
||||
name = vqshrn_high
|
||||
|
|
@ -4261,21 +4308,12 @@ validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
|||
|
||||
aarch64 = srshl
|
||||
link-aarch64 = srshl._EXT_
|
||||
generate i64
|
||||
|
||||
arm = vrshl
|
||||
link-arm = vrshifts._EXT_
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Signed rounding shift left
|
||||
name = vrshl
|
||||
multi_fn = transmute, {vrshl-in_ntt-noext, transmute(a), transmute(b)}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
||||
aarch64 = srshl
|
||||
generate i64
|
||||
|
||||
/// Unsigned rounding shift left
|
||||
name = vrshl
|
||||
out-suffix
|
||||
|
|
@ -4285,23 +4323,13 @@ validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
|||
|
||||
aarch64 = urshl
|
||||
link-aarch64 = urshl._EXT_
|
||||
generate u64:i64:u64
|
||||
|
||||
arm = vrshl
|
||||
link-arm = vrshiftu._EXT_
|
||||
generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
|
||||
generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
|
||||
|
||||
/// Unsigned rounding shift left
|
||||
name = vrshl
|
||||
out-suffix
|
||||
multi_fn = transmute, {vrshl-out_ntt-noext, transmute(a), transmute(b)}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
||||
aarch64 = urshl
|
||||
generate u64:i64:u64
|
||||
|
||||
/// Signed rounding shift right
|
||||
name = vrshr
|
||||
n-suffix
|
||||
|
|
@ -4438,15 +4466,14 @@ name = vrsra
|
|||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshr_n-in_ntt-::<N>, b:in_ntt, transmute(b)
|
||||
multi_fn = transmute, {simd_add, transmute(a), b}
|
||||
multi_fn = vrshr-nself-::<N>, b:in_t, b
|
||||
multi_fn = a + b
|
||||
a = 1
|
||||
b = 4
|
||||
n = 2
|
||||
validate 2
|
||||
|
||||
// We use "nop" here to skip the instruction test, since it cannot be optimized correctly.
|
||||
aarch64 = nop
|
||||
aarch64 = srsra
|
||||
generate i64
|
||||
|
||||
/// Ungisned rounding shift right and accumulate.
|
||||
|
|
@ -4454,21 +4481,20 @@ name = vrsra
|
|||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshr_n-in_ntt-::<N>, b:in_ntt, transmute(b)
|
||||
multi_fn = transmute, {simd_add, transmute(a), b}
|
||||
multi_fn = vrshr-nself-::<N>, b:in_t, b
|
||||
multi_fn = a + b
|
||||
a = 1
|
||||
b = 4
|
||||
n = 2
|
||||
validate 2
|
||||
|
||||
// We use "nop" here to skip the instruction test, since it cannot be optimized correctly.
|
||||
aarch64 = nop
|
||||
aarch64 = ursra
|
||||
generate u64
|
||||
|
||||
/// Insert vector element from another vector element
|
||||
name = vset_lane
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_bits_exp_len-LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_insert, b, LANE as u32, a
|
||||
a = 1
|
||||
b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -4490,7 +4516,7 @@ generate p64:poly64x1_t:poly64x1_t
|
|||
name = vsetq_lane
|
||||
no-q
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_bits_exp_len-LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_insert, b, LANE as u32, a
|
||||
a = 1
|
||||
b = 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -4547,10 +4573,10 @@ a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
|||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
||||
arm = vshl
|
||||
link-arm = vshifts._EXT_
|
||||
aarch64 = sshl
|
||||
link-aarch64 = sshl._EXT_
|
||||
arm = vshl
|
||||
link-arm = vshifts._EXT_
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Signed Shift left
|
||||
|
|
@ -4570,10 +4596,10 @@ a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
|||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
||||
arm = vshl
|
||||
link-arm = vshiftu._EXT_
|
||||
aarch64 = ushl
|
||||
link-aarch64 = ushl._EXT_
|
||||
arm = vshl
|
||||
link-arm = vshiftu._EXT_
|
||||
generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
|
||||
generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue