Add vrshl, vrshr, vrshrn, vrsra, vsra neon instructions (#1127)
This commit is contained in:
parent
d46e0086e4
commit
6354de5993
5 changed files with 2205 additions and 0 deletions
|
|
@ -4605,6 +4605,140 @@ pub unsafe fn vreinterpretq_f32_f64(a: float64x2_t) -> float32x4_t {
|
|||
transmute(a)
|
||||
}
|
||||
|
||||
/// Signed rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(srshl))]
|
||||
pub unsafe fn vrshld_s64(a: i64, b: i64) -> i64 {
|
||||
transmute(vrshl_s64(transmute(a), transmute(b)))
|
||||
}
|
||||
|
||||
/// Unsigned rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(urshl))]
|
||||
pub unsafe fn vrshld_u64(a: u64, b: i64) -> u64 {
|
||||
transmute(vrshl_u64(transmute(a), transmute(b)))
|
||||
}
|
||||
|
||||
/// Signed rounding shift right
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(srshr, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vrshrd_n_s64<const N: i32>(a: i64) -> i64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
vrshld_s64(a, -N as i64)
|
||||
}
|
||||
|
||||
/// Unsigned rounding shift right
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(urshr, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vrshrd_n_u64<const N: i32>(a: u64) -> u64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
vrshld_u64(a, -N as i64)
|
||||
}
|
||||
|
||||
/// Rounding shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 8);
|
||||
simd_shuffle16(a, vrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Rounding shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 16);
|
||||
simd_shuffle8(a, vrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Rounding shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_shuffle4(a, vrshrn_n_s64::<N>(b), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Rounding shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 8);
|
||||
simd_shuffle16(a, vrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
|
||||
}
|
||||
|
||||
/// Rounding shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 16);
|
||||
simd_shuffle8(a, vrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
|
||||
}
|
||||
|
||||
/// Rounding shift right narrow
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(rshrn2, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 32);
|
||||
simd_shuffle4(a, vrshrn_n_u64::<N>(b), [0, 1, 2, 3])
|
||||
}
|
||||
|
||||
/// Signed rounding shift right and accumulate.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
let b: int64x1_t = vrshr_n_s64::<N>(transmute(b));
|
||||
transmute(simd_add(transmute(a), b))
|
||||
}
|
||||
|
||||
/// Ungisned rounding shift right and accumulate.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vrsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
let b: uint64x1_t = vrshr_n_u64::<N>(transmute(b));
|
||||
transmute(simd_add(transmute(a), b))
|
||||
}
|
||||
|
||||
/// Signed Shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sshl))]
|
||||
pub unsafe fn vshld_s64(a: i64, b: i64) -> i64 {
|
||||
transmute(vshl_s64(transmute(a), transmute(b)))
|
||||
}
|
||||
|
||||
/// Unsigned Shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ushl))]
|
||||
pub unsafe fn vshld_u64(a: u64, b: i64) -> u64 {
|
||||
transmute(vshl_u64(transmute(a), transmute(b)))
|
||||
}
|
||||
|
||||
/// Signed shift left long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -9872,6 +10006,130 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshld_s64() {
|
||||
let a: i64 = 1;
|
||||
let b: i64 = 2;
|
||||
let e: i64 = 4;
|
||||
let r: i64 = transmute(vrshld_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshld_u64() {
|
||||
let a: u64 = 1;
|
||||
let b: i64 = 2;
|
||||
let e: u64 = 4;
|
||||
let r: u64 = transmute(vrshld_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshrd_n_s64() {
|
||||
let a: i64 = 4;
|
||||
let e: i64 = 1;
|
||||
let r: i64 = transmute(vrshrd_n_s64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshrd_n_u64() {
|
||||
let a: u64 = 4;
|
||||
let e: u64 = 1;
|
||||
let r: u64 = transmute(vrshrd_n_u64::<2>(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshrn_high_n_s16() {
|
||||
let a: i8x8 = i8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let b: i16x8 = i16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let e: i8x16 = i8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r: i8x16 = transmute(vrshrn_high_n_s16::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshrn_high_n_s32() {
|
||||
let a: i16x4 = i16x4::new(0, 1, 8, 9);
|
||||
let b: i32x4 = i32x4::new(32, 36, 40, 44);
|
||||
let e: i16x8 = i16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let r: i16x8 = transmute(vrshrn_high_n_s32::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshrn_high_n_s64() {
|
||||
let a: i32x2 = i32x2::new(0, 1);
|
||||
let b: i64x2 = i64x2::new(32, 36);
|
||||
let e: i32x4 = i32x4::new(0, 1, 8, 9);
|
||||
let r: i32x4 = transmute(vrshrn_high_n_s64::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshrn_high_n_u16() {
|
||||
let a: u8x8 = u8x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let b: u16x8 = u16x8::new(32, 36, 40, 44, 48, 52, 56, 60);
|
||||
let e: u8x16 = u8x16::new(0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r: u8x16 = transmute(vrshrn_high_n_u16::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshrn_high_n_u32() {
|
||||
let a: u16x4 = u16x4::new(0, 1, 8, 9);
|
||||
let b: u32x4 = u32x4::new(32, 36, 40, 44);
|
||||
let e: u16x8 = u16x8::new(0, 1, 8, 9, 8, 9, 10, 11);
|
||||
let r: u16x8 = transmute(vrshrn_high_n_u32::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrshrn_high_n_u64() {
|
||||
let a: u32x2 = u32x2::new(0, 1);
|
||||
let b: u64x2 = u64x2::new(32, 36);
|
||||
let e: u32x4 = u32x4::new(0, 1, 8, 9);
|
||||
let r: u32x4 = transmute(vrshrn_high_n_u64::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrsrad_n_s64() {
|
||||
let a: i64 = 1;
|
||||
let b: i64 = 4;
|
||||
let e: i64 = 2;
|
||||
let r: i64 = transmute(vrsrad_n_s64::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrsrad_n_u64() {
|
||||
let a: u64 = 1;
|
||||
let b: u64 = 4;
|
||||
let e: u64 = 2;
|
||||
let r: u64 = transmute(vrsrad_n_u64::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vshld_s64() {
|
||||
let a: i64 = 1;
|
||||
let b: i64 = 2;
|
||||
let e: i64 = 4;
|
||||
let r: i64 = transmute(vshld_s64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vshld_u64() {
|
||||
let a: u64 = 1;
|
||||
let b: i64 = 2;
|
||||
let e: u64 = 4;
|
||||
let r: u64 = transmute(vshld_u64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vshll_high_n_s8() {
|
||||
let a: i8x16 = i8x16::new(0, 0, 1, 2, 1, 2, 3, 4, 1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
|
|
|||
|
|
@ -2447,6 +2447,66 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol
|
|||
))
|
||||
}
|
||||
|
||||
/// Shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vshld_n_s64<const N: i32>(a: i64) -> i64 {
|
||||
static_assert_imm6!(N);
|
||||
a << N
|
||||
}
|
||||
|
||||
/// Shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vshld_n_u64<const N: i32>(a: u64) -> u64 {
|
||||
static_assert_imm6!(N);
|
||||
a << N
|
||||
}
|
||||
|
||||
/// Signed shift right
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vshrd_n_s64<const N: i32>(a: i64) -> i64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
a >> N
|
||||
}
|
||||
|
||||
/// Unsigned shift right
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[rustc_legacy_const_generics(1)]
|
||||
pub unsafe fn vshrd_n_u64<const N: i32>(a: u64) -> u64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
a >> N
|
||||
}
|
||||
|
||||
/// Signed shift right and accumulate
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsrad_n_s64<const N: i32>(a: i64, b: i64) -> i64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
a + (b >> N)
|
||||
}
|
||||
|
||||
/// Unsigned shift right and accumulate
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(nop, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vsrad_n_u64<const N: i32>(a: u64, b: u64) -> u64 {
|
||||
static_assert!(N : i32 where N >= 1 && N <= 64);
|
||||
a + (b >> N)
|
||||
}
|
||||
|
||||
/// Shift Left and Insert (immediate)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -3512,6 +3572,56 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vshld_n_s64() {
|
||||
let a: i64 = 1;
|
||||
let e: i64 = 4;
|
||||
let r: i64 = vshld_n_s64::<2>(a);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vshld_n_u64() {
|
||||
let a: u64 = 1;
|
||||
let e: u64 = 4;
|
||||
let r: u64 = vshld_n_u64::<2>(a);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vshrd_n_s64() {
|
||||
let a: i64 = 4;
|
||||
let e: i64 = 1;
|
||||
let r: i64 = vshrd_n_s64::<2>(a);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vshrd_n_u64() {
|
||||
let a: u64 = 4;
|
||||
let e: u64 = 1;
|
||||
let r: u64 = vshrd_n_u64::<2>(a);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsrad_n_s64() {
|
||||
let a: i64 = 1;
|
||||
let b: i64 = 4;
|
||||
let e: i64 = 2;
|
||||
let r: i64 = vsrad_n_s64::<2>(a, b);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsrad_n_u64() {
|
||||
let a: u64 = 1;
|
||||
let b: u64 = 4;
|
||||
let e: u64 = 2;
|
||||
let r: u64 = vsrad_n_u64::<2>(a, b);
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
macro_rules! test_vcombine {
|
||||
($test_id:ident => $fn_id:ident ([$($a:expr),*], [$($b:expr),*])) => {
|
||||
#[allow(unused_assignments)]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -2709,6 +2709,218 @@ aarch64 = str
|
|||
generate float32x2_t:float64x1_t, float64x1_t:float32x2_t
|
||||
generate float32x4_t:float64x2_t, float64x2_t:float32x4_t
|
||||
|
||||
/// Signed rounding shift left
|
||||
name = vrshl
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
||||
aarch64 = srshl
|
||||
link-aarch64 = srshl._EXT_
|
||||
|
||||
arm = vrshl
|
||||
link-arm = vrshifts._EXT_
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Signed rounding shift left
|
||||
name = vrshl
|
||||
multi_fn = transmute, {vrshl-in_ntt-noext, transmute(a), transmute(b)}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
||||
aarch64 = srshl
|
||||
generate i64
|
||||
|
||||
/// Unsigned rounding shift left
|
||||
name = vrshl
|
||||
out-suffix
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
||||
aarch64 = urshl
|
||||
link-aarch64 = urshl._EXT_
|
||||
|
||||
arm = vrshl
|
||||
link-arm = vrshiftu._EXT_
|
||||
generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
|
||||
generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
|
||||
|
||||
/// Unsigned rounding shift left
|
||||
name = vrshl
|
||||
out-suffix
|
||||
multi_fn = transmute, {vrshl-out_ntt-noext, transmute(a), transmute(b)}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
|
||||
aarch64 = urshl
|
||||
generate u64:i64:u64
|
||||
|
||||
/// Signed rounding shift right
|
||||
name = vrshr
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshl-self-noext, a, {vdup-nself-noext, (-N).try_into().unwrap()}
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
||||
aarch64 = srshr
|
||||
arm = vrshr
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Signed rounding shift right
|
||||
name = vrshr
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshl-self-noext, a, -N as i64
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
||||
aarch64 = srshr
|
||||
generate i64
|
||||
|
||||
/// Unsigned rounding shift right
|
||||
name = vrshr
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshl-self-noext, a, {vdup-nsigned-noext, (-N).try_into().unwrap()}
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
||||
aarch64 = urshr
|
||||
arm = vrshr
|
||||
generate uint*_t, uint64x*_t
|
||||
|
||||
/// Unsigned rounding shift right
|
||||
name = vrshr
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshl-self-noext, a, -N as i64
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
||||
aarch64 = urshr
|
||||
generate u64
|
||||
|
||||
/// Rounding shift right narrow
|
||||
name = vrshrn
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
||||
aarch64 = rshrn
|
||||
link-aarch64 = rshrn._EXT2_
|
||||
const-aarch64 = N
|
||||
|
||||
arm = vrshrn
|
||||
link-arm = vrshiftn._EXT2_
|
||||
const-arm = -N as ttn
|
||||
generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
|
||||
|
||||
/// Rounding shift right narrow
|
||||
name = vrshrn
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = transmute, {vrshrn_n-noqsigned-::<N>, transmute(a)}
|
||||
a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
||||
aarch64 = rshrn
|
||||
arm = vrshrn
|
||||
generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
|
||||
|
||||
/// Rounding shift right narrow
|
||||
name = vrshrn_high
|
||||
noq-n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-halfbits
|
||||
multi_fn = simd_shuffle-out_len-noext, a, {vrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
|
||||
a = 0, 1, 8, 9, 8, 9, 10, 11
|
||||
b = 32, 36, 40, 44, 48, 52, 56, 60
|
||||
n = 2
|
||||
validate 0, 1, 8, 9, 8, 9, 10, 11, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = rshrn2
|
||||
generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
|
||||
generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
|
||||
|
||||
/// Signed rounding shift right and accumulate
|
||||
name = vrsra
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = simd_add, a, {vrshr-nself-::<N>, b}
|
||||
a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
|
||||
aarch64 = srsra
|
||||
arm = vrsra
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Unsigned rounding shift right and accumulate
|
||||
name = vrsra
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = simd_add, a, {vrshr-nself-::<N>, b}
|
||||
a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
|
||||
aarch64 = ursra
|
||||
arm = vrsra
|
||||
generate uint*_t, uint64x*_t
|
||||
|
||||
/// Signed rounding shift right and accumulate.
|
||||
name = vrsra
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshr_n-in_ntt-::<N>, b:in_ntt, transmute(b)
|
||||
multi_fn = transmute, {simd_add, transmute(a), b}
|
||||
a = 1
|
||||
b = 4
|
||||
n = 2
|
||||
validate 2
|
||||
|
||||
// We use "nop" here to skip the instruction test, since it cannot be optimized correctly.
|
||||
aarch64 = nop
|
||||
generate i64
|
||||
|
||||
/// Ungisned rounding shift right and accumulate.
|
||||
name = vrsra
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = vrshr_n-in_ntt-::<N>, b:in_ntt, transmute(b)
|
||||
multi_fn = transmute, {simd_add, transmute(a), b}
|
||||
a = 1
|
||||
b = 4
|
||||
n = 2
|
||||
validate 2
|
||||
|
||||
// We use "nop" here to skip the instruction test, since it cannot be optimized correctly.
|
||||
aarch64 = nop
|
||||
generate u64
|
||||
|
||||
/// Signed Shift left
|
||||
name = vshl
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -2721,6 +2933,16 @@ aarch64 = sshl
|
|||
link-aarch64 = sshl._EXT_
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Signed Shift left
|
||||
name = vshl
|
||||
multi_fn = transmute, {vshl-in_ntt-noext, transmute(a), transmute(b)}
|
||||
a = 1
|
||||
b = 2
|
||||
validate 4
|
||||
|
||||
aarch64 = sshl
|
||||
generate i64
|
||||
|
||||
/// Unsigned Shift left
|
||||
name = vshl
|
||||
out-suffix
|
||||
|
|
@ -2735,6 +2957,17 @@ link-aarch64 = ushl._EXT_
|
|||
generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
|
||||
generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
|
||||
|
||||
/// Unsigned Shift left
|
||||
out-suffix
|
||||
name = vshl
|
||||
multi_fn = transmute, {vshl-out_ntt-noext, transmute(a), transmute(b)}
|
||||
a = 1
|
||||
b = 2
|
||||
validate 4
|
||||
|
||||
aarch64 = ushl
|
||||
generate u64:i64:u64
|
||||
|
||||
/// Shift left
|
||||
name = vshl
|
||||
n-suffix
|
||||
|
|
@ -2827,6 +3060,36 @@ aarch64 = shrn2
|
|||
generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
|
||||
generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
|
||||
|
||||
/// Signed shift right and accumulate
|
||||
name = vsra
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = simd_add, a, {vshr-nself-::<N>, b}
|
||||
a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
|
||||
aarch64 = ssra
|
||||
arm = vsra
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Unsigned shift right and accumulate
|
||||
name = vsra
|
||||
n-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert-N-1-bits
|
||||
multi_fn = simd_add, a, {vshr-nself-::<N>, b}
|
||||
a = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
|
||||
b = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64
|
||||
n = 2
|
||||
validate 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
|
||||
aarch64 = usra
|
||||
arm = vsra
|
||||
generate uint*_t, uint64x*_t
|
||||
|
||||
/// Transpose vectors
|
||||
name = vtrn1
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-1-in_len}
|
||||
|
|
|
|||
|
|
@ -1932,6 +1932,8 @@ fn get_call(
|
|||
fn_name.push_str(&type_to_noq_double_suffixes(out_t, in_t[1]));
|
||||
} else if fn_format[1] == "noqself" {
|
||||
fn_name.push_str(type_to_noq_suffix(in_t[1]));
|
||||
} else if fn_format[1] == "noqsigned" {
|
||||
fn_name.push_str(type_to_noq_suffix(type_to_signed(in_t[1])));
|
||||
} else if fn_format[1] == "nosuffix" {
|
||||
} else if fn_format[1] == "in_len" {
|
||||
fn_name.push_str(&type_len(in_t[1]).to_string());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue