Add vqrsh* neon instructions (#1119)

This commit is contained in:
Sparrow Li 2021-04-15 19:29:04 +08:00 committed by GitHub
parent 33afae1df7
commit 23f45cc955
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 1862 additions and 128 deletions

View file

@ -3166,6 +3166,275 @@ pub unsafe fn vpminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
vpminnmq_f32_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshl))]
pub unsafe fn vqrshlb_s8(a: i8, b: i8) -> i8 {
let a: int8x8_t = vdup_n_s8(a);
let b: int8x8_t = vdup_n_s8(b);
simd_extract(vqrshl_s8(a, b), 0)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshl))]
pub unsafe fn vqrshlh_s16(a: i16, b: i16) -> i16 {
let a: int16x4_t = vdup_n_s16(a);
let b: int16x4_t = vdup_n_s16(b);
simd_extract(vqrshl_s16(a, b), 0)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshl))]
pub unsafe fn vqrshls_s32(a: i32, b: i32) -> i32 {
let a: int32x2_t = vdup_n_s32(a);
let b: int32x2_t = vdup_n_s32(b);
simd_extract(vqrshl_s32(a, b), 0)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshl))]
pub unsafe fn vqrshld_s64(a: i64, b: i64) -> i64 {
let a: int64x1_t = vdup_n_s64(a);
let b: int64x1_t = vdup_n_s64(b);
simd_extract(vqrshl_s64(a, b), 0)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshl))]
pub unsafe fn vqrshlb_u8(a: u8, b: i8) -> u8 {
let a: uint8x8_t = vdup_n_u8(a);
let b: int8x8_t = vdup_n_s8(b);
simd_extract(vqrshl_u8(a, b), 0)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshl))]
pub unsafe fn vqrshlh_u16(a: u16, b: i16) -> u16 {
let a: uint16x4_t = vdup_n_u16(a);
let b: int16x4_t = vdup_n_s16(b);
simd_extract(vqrshl_u16(a, b), 0)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshl))]
pub unsafe fn vqrshls_u32(a: u32, b: i32) -> u32 {
let a: uint32x2_t = vdup_n_u32(a);
let b: int32x2_t = vdup_n_s32(b);
simd_extract(vqrshl_u32(a, b), 0)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshl))]
pub unsafe fn vqrshld_u64(a: u64, b: i64) -> u64 {
let a: uint64x1_t = vdup_n_u64(a);
let b: int64x1_t = vdup_n_s64(b);
simd_extract(vqrshl_u64(a, b), 0)
}
/// Signed saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrnh_n_s16<const N: i32>(a: i16) -> i8 {
static_assert!(N : i32 where N >= 1 && N <= 8);
let a: int16x8_t = vdupq_n_s16(a);
simd_extract(vqrshrn_n_s16::<N>(a), 0)
}
/// Signed saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrns_n_s32<const N: i32>(a: i32) -> i16 {
static_assert!(N : i32 where N >= 1 && N <= 16);
let a: int32x4_t = vdupq_n_s32(a);
simd_extract(vqrshrn_n_s32::<N>(a), 0)
}
/// Signed saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrnd_n_s64<const N: i32>(a: i64) -> i32 {
static_assert!(N : i32 where N >= 1 && N <= 32);
let a: int64x2_t = vdupq_n_s64(a);
simd_extract(vqrshrn_n_s64::<N>(a), 0)
}
/// Signed saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrn_high_n_s16<const N: i32>(a: int8x8_t, b: int16x8_t) -> int8x16_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
simd_shuffle16(a, vqrshrn_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
}
/// Signed saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrn_high_n_s32<const N: i32>(a: int16x4_t, b: int32x4_t) -> int16x8_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
simd_shuffle8(a, vqrshrn_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Signed saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrn2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrn_high_n_s64<const N: i32>(a: int32x2_t, b: int64x2_t) -> int32x4_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
simd_shuffle4(a, vqrshrn_n_s64::<N>(b), [0, 1, 2, 3])
}
/// Unsigned saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrnh_n_u16<const N: i32>(a: u16) -> u8 {
static_assert!(N : i32 where N >= 1 && N <= 8);
let a: uint16x8_t = vdupq_n_u16(a);
simd_extract(vqrshrn_n_u16::<N>(a), 0)
}
/// Unsigned saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrns_n_u32<const N: i32>(a: u32) -> u16 {
static_assert!(N : i32 where N >= 1 && N <= 16);
let a: uint32x4_t = vdupq_n_u32(a);
simd_extract(vqrshrn_n_u32::<N>(a), 0)
}
/// Unsigned saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrnd_n_u64<const N: i32>(a: u64) -> u32 {
static_assert!(N : i32 where N >= 1 && N <= 32);
let a: uint64x2_t = vdupq_n_u64(a);
simd_extract(vqrshrn_n_u64::<N>(a), 0)
}
/// Unsigned saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrn_high_n_u16<const N: i32>(a: uint8x8_t, b: uint16x8_t) -> uint8x16_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
simd_shuffle16(a, vqrshrn_n_u16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
}
/// Unsigned saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrn_high_n_u32<const N: i32>(a: uint16x4_t, b: uint32x4_t) -> uint16x8_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
simd_shuffle8(a, vqrshrn_n_u32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Unsigned saturating rounded shift right narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uqrshrn2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrn_high_n_u64<const N: i32>(a: uint32x2_t, b: uint64x2_t) -> uint32x4_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
simd_shuffle4(a, vqrshrn_n_u64::<N>(b), [0, 1, 2, 3])
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrunh_n_s16<const N: i32>(a: i16) -> u8 {
static_assert!(N : i32 where N >= 1 && N <= 8);
let a: int16x8_t = vdupq_n_s16(a);
simd_extract(vqrshrun_n_s16::<N>(a), 0)
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshruns_n_s32<const N: i32>(a: i32) -> u16 {
static_assert!(N : i32 where N >= 1 && N <= 16);
let a: int32x4_t = vdupq_n_s32(a);
simd_extract(vqrshrun_n_s32::<N>(a), 0)
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrund_n_s64<const N: i32>(a: i64) -> u32 {
static_assert!(N : i32 where N >= 1 && N <= 32);
let a: int64x2_t = vdupq_n_s64(a);
simd_extract(vqrshrun_n_s64::<N>(a), 0)
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrun_high_n_s16<const N: i32>(a: uint8x8_t, b: int16x8_t) -> uint8x16_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
simd_shuffle16(a, vqrshrun_n_s16::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15])
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrun_high_n_s32<const N: i32>(a: uint16x4_t, b: int32x4_t) -> uint16x8_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
simd_shuffle8(a, vqrshrun_n_s32::<N>(b), [0, 1, 2, 3, 4, 5, 6, 7])
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sqrshrun2, N = 2))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vqrshrun_high_n_s64<const N: i32>(a: uint32x2_t, b: int64x2_t) -> uint32x4_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
simd_shuffle4(a, vqrshrun_n_s64::<N>(b), [0, 1, 2, 3])
}
/// Calculates the square root of each lane.
#[inline]
#[target_feature(enable = "neon")]
@ -7919,6 +8188,231 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlb_s8() {
let a: i8 = 1;
let b: i8 = 2;
let e: i8 = 4;
let r: i8 = transmute(vqrshlb_s8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlh_s16() {
let a: i16 = 1;
let b: i16 = 2;
let e: i16 = 4;
let r: i16 = transmute(vqrshlh_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshls_s32() {
let a: i32 = 1;
let b: i32 = 2;
let e: i32 = 4;
let r: i32 = transmute(vqrshls_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshld_s64() {
let a: i64 = 1;
let b: i64 = 2;
let e: i64 = 4;
let r: i64 = transmute(vqrshld_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlb_u8() {
let a: u8 = 1;
let b: i8 = 2;
let e: u8 = 4;
let r: u8 = transmute(vqrshlb_u8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlh_u16() {
let a: u16 = 1;
let b: i16 = 2;
let e: u16 = 4;
let r: u16 = transmute(vqrshlh_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshls_u32() {
let a: u32 = 1;
let b: i32 = 2;
let e: u32 = 4;
let r: u32 = transmute(vqrshls_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshld_u64() {
let a: u64 = 1;
let b: i64 = 2;
let e: u64 = 4;
let r: u64 = transmute(vqrshld_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrnh_n_s16() {
let a: i16 = 4;
let e: i8 = 1;
let r: i8 = transmute(vqrshrnh_n_s16::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrns_n_s32() {
let a: i32 = 4;
let e: i16 = 1;
let r: i16 = transmute(vqrshrns_n_s32::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrnd_n_s64() {
let a: i64 = 4;
let e: i32 = 1;
let r: i32 = transmute(vqrshrnd_n_s64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_high_n_s16() {
let a: i8x8 = i8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
let b: i16x8 = i16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
let e: i8x16 = i8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
let r: i8x16 = transmute(vqrshrn_high_n_s16::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_high_n_s32() {
let a: i16x4 = i16x4::new(0, 1, 2, 3);
let b: i32x4 = i32x4::new(8, 12, 24, 28);
let e: i16x8 = i16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
let r: i16x8 = transmute(vqrshrn_high_n_s32::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_high_n_s64() {
let a: i32x2 = i32x2::new(0, 1);
let b: i64x2 = i64x2::new(8, 12);
let e: i32x4 = i32x4::new(0, 1, 2, 3);
let r: i32x4 = transmute(vqrshrn_high_n_s64::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrnh_n_u16() {
let a: u16 = 4;
let e: u8 = 1;
let r: u8 = transmute(vqrshrnh_n_u16::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrns_n_u32() {
let a: u32 = 4;
let e: u16 = 1;
let r: u16 = transmute(vqrshrns_n_u32::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrnd_n_u64() {
let a: u64 = 4;
let e: u32 = 1;
let r: u32 = transmute(vqrshrnd_n_u64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_high_n_u16() {
let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
let b: u16x8 = u16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
let r: u8x16 = transmute(vqrshrn_high_n_u16::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_high_n_u32() {
let a: u16x4 = u16x4::new(0, 1, 2, 3);
let b: u32x4 = u32x4::new(8, 12, 24, 28);
let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
let r: u16x8 = transmute(vqrshrn_high_n_u32::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_high_n_u64() {
let a: u32x2 = u32x2::new(0, 1);
let b: u64x2 = u64x2::new(8, 12);
let e: u32x4 = u32x4::new(0, 1, 2, 3);
let r: u32x4 = transmute(vqrshrn_high_n_u64::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrunh_n_s16() {
let a: i16 = 4;
let e: u8 = 1;
let r: u8 = transmute(vqrshrunh_n_s16::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshruns_n_s32() {
let a: i32 = 4;
let e: u16 = 1;
let r: u16 = transmute(vqrshruns_n_s32::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrund_n_s64() {
let a: i64 = 4;
let e: u32 = 1;
let r: u32 = transmute(vqrshrund_n_s64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrun_high_n_s16() {
let a: u8x8 = u8x8::new(0, 1, 2, 3, 2, 3, 6, 7);
let b: i16x8 = i16x8::new(8, 12, 24, 28, 48, 52, 56, 60);
let e: u8x16 = u8x16::new(0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15);
let r: u8x16 = transmute(vqrshrun_high_n_s16::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrun_high_n_s32() {
let a: u16x4 = u16x4::new(0, 1, 2, 3);
let b: i32x4 = i32x4::new(8, 12, 24, 28);
let e: u16x8 = u16x8::new(0, 1, 2, 3, 2, 3, 6, 7);
let r: u16x8 = transmute(vqrshrun_high_n_s32::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrun_high_n_s64() {
let a: u32x2 = u32x2::new(0, 1);
let b: i64x2 = i64x2::new(8, 12);
let e: u32x4 = u32x4::new(0, 1, 2, 3);
let r: u32x4 = transmute(vqrshrun_high_n_s64::<2>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vsqrt_f32() {
let a: f32x2 = f32x2::new(4.0, 9.0);

View file

@ -5886,6 +5886,550 @@ pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
vminnmq_f32_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")]
fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
}
vqrshl_s8_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")]
fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
}
vqrshlq_s8_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")]
fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
}
vqrshl_s16_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")]
fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
}
vqrshlq_s16_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")]
fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
}
vqrshl_s32_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")]
fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
}
vqrshlq_s32_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")]
fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
}
vqrshl_s64_(a, b)
}
/// Signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))]
pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")]
fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
}
vqrshlq_s64_(a, b)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")]
fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t;
}
vqrshl_u8_(a, b)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")]
fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t;
}
vqrshlq_u8_(a, b)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")]
fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t;
}
vqrshl_u16_(a, b)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")]
fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t;
}
vqrshlq_u16_(a, b)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")]
fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t;
}
vqrshl_u32_(a, b)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")]
fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t;
}
vqrshlq_u32_(a, b)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")]
fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t;
}
vqrshl_u64_(a, b)
}
/// Unsigned signed saturating rounding shift left
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))]
pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")]
fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
}
vqrshlq_u64_(a, b)
}
/// Signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")]
fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t;
}
vqrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
}
/// Signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_s16<const N: i32>(a: int16x8_t) -> int8x8_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")]
fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t;
}
vqrshrn_n_s16_(a, N)
}
/// Signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")]
fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t;
}
vqrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
}
/// Signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_s32<const N: i32>(a: int32x4_t) -> int16x4_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")]
fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t;
}
vqrshrn_n_s32_(a, N)
}
/// Signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")]
fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t;
}
vqrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64))
}
/// Signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_s64<const N: i32>(a: int64x2_t) -> int32x2_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")]
fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t;
}
vqrshrn_n_s64_(a, N)
}
/// Unsigned signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")]
fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t;
}
vqrshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16))
}
/// Unsigned signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_u16<const N: i32>(a: uint16x8_t) -> uint8x8_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")]
fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t;
}
vqrshrn_n_u16_(a, N)
}
/// Unsigned signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")]
fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t;
}
vqrshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32))
}
/// Unsigned signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_u32<const N: i32>(a: uint32x4_t) -> uint16x4_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")]
fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t;
}
vqrshrn_n_u32_(a, N)
}
/// Unsigned signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")]
fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t;
}
vqrshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64))
}
/// Unsigned signed saturating rounded shift right narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshrn, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrn_n_u64<const N: i32>(a: uint64x2_t) -> uint32x2_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")]
fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t;
}
vqrshrn_n_u64_(a, N)
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")]
fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t;
}
vqrshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16))
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrun_n_s16<const N: i32>(a: int16x8_t) -> uint8x8_t {
static_assert!(N : i32 where N >= 1 && N <= 8);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")]
fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t;
}
vqrshrun_n_s16_(a, N)
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")]
fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t;
}
vqrshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32))
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrun_n_s32<const N: i32>(a: int32x4_t) -> uint16x4_t {
static_assert!(N : i32 where N >= 1 && N <= 16);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")]
fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t;
}
vqrshrun_n_s32_(a, N)
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")]
fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t;
}
vqrshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64))
}
/// Signed saturating rounded shift right unsigned narrow
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshrun, N = 2))]
#[rustc_legacy_const_generics(1)]
pub unsafe fn vqrshrun_n_s64<const N: i32>(a: int64x2_t) -> uint32x2_t {
static_assert!(N : i32 where N >= 1 && N <= 32);
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")]
fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t;
}
vqrshrun_n_s64_(a, N)
}
/// Reciprocal square-root estimate.
#[inline]
#[target_feature(enable = "neon")]
@ -13474,6 +14018,222 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_s8() {
let a: i8x8 = i8x8::new(-128, 0x7F, 2, 3, 4, 5, 6, 7);
let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: i8x8 = i8x8::new(-128, 0x7F, 8, 12, 16, 20, 24, 28);
let r: i8x8 = transmute(vqrshl_s8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_s8() {
let a: i8x16 = i8x16::new(-128, 0x7F, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i8x16 = i8x16::new(-128, 0x7F, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
let r: i8x16 = transmute(vqrshlq_s8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_s16() {
let a: i16x4 = i16x4::new(-32768, 0x7F_FF, 2, 3);
let b: i16x4 = i16x4::new(2, 2, 2, 2);
let e: i16x4 = i16x4::new(-32768, 0x7F_FF, 8, 12);
let r: i16x4 = transmute(vqrshl_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_s16() {
let a: i16x8 = i16x8::new(-32768, 0x7F_FF, 2, 3, 4, 5, 6, 7);
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: i16x8 = i16x8::new(-32768, 0x7F_FF, 8, 12, 16, 20, 24, 28);
let r: i16x8 = transmute(vqrshlq_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_s32() {
let a: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
let b: i32x2 = i32x2::new(2, 2);
let e: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF);
let r: i32x2 = transmute(vqrshl_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_s32() {
let a: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 2, 3);
let b: i32x4 = i32x4::new(2, 2, 2, 2);
let e: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 8, 12);
let r: i32x4 = transmute(vqrshlq_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_s64() {
let a: i64x1 = i64x1::new(-9223372036854775808);
let b: i64x1 = i64x1::new(2);
let e: i64x1 = i64x1::new(-9223372036854775808);
let r: i64x1 = transmute(vqrshl_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_s64() {
let a: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
let b: i64x2 = i64x2::new(2, 2);
let e: i64x2 = i64x2::new(-9223372036854775808, 0x7F_FF_FF_FF_FF_FF_FF_FF);
let r: i64x2 = transmute(vqrshlq_s64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_u8() {
let a: u8x8 = u8x8::new(0, 0xFF, 2, 3, 4, 5, 6, 7);
let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: u8x8 = u8x8::new(0, 0xFF, 8, 12, 16, 20, 24, 28);
let r: u8x8 = transmute(vqrshl_u8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_u8() {
let a: u8x16 = u8x16::new(0, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let e: u8x16 = u8x16::new(0, 0xFF, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60);
let r: u8x16 = transmute(vqrshlq_u8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_u16() {
let a: u16x4 = u16x4::new(0, 0xFF_FF, 2, 3);
let b: i16x4 = i16x4::new(2, 2, 2, 2);
let e: u16x4 = u16x4::new(0, 0xFF_FF, 8, 12);
let r: u16x4 = transmute(vqrshl_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_u16() {
let a: u16x8 = u16x8::new(0, 0xFF_FF, 2, 3, 4, 5, 6, 7);
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
let e: u16x8 = u16x8::new(0, 0xFF_FF, 8, 12, 16, 20, 24, 28);
let r: u16x8 = transmute(vqrshlq_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_u32() {
let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
let b: i32x2 = i32x2::new(2, 2);
let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
let r: u32x2 = transmute(vqrshl_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_u32() {
let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 2, 3);
let b: i32x4 = i32x4::new(2, 2, 2, 2);
let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 8, 12);
let r: u32x4 = transmute(vqrshlq_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshl_u64() {
let a: u64x1 = u64x1::new(0);
let b: i64x1 = i64x1::new(2);
let e: u64x1 = u64x1::new(0);
let r: u64x1 = transmute(vqrshl_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshlq_u64() {
let a: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let b: i64x2 = i64x2::new(2, 2);
let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vqrshlq_u64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_n_s16() {
let a: i16x8 = i16x8::new(-32768, 4, 8, 12, 16, 20, 24, 28);
let e: i8x8 = i8x8::new(-128, 1, 2, 3, 4, 5, 6, 7);
let r: i8x8 = transmute(vqrshrn_n_s16::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_n_s32() {
let a: i32x4 = i32x4::new(-2147483648, 4, 8, 12);
let e: i16x4 = i16x4::new(-32768, 1, 2, 3);
let r: i16x4 = transmute(vqrshrn_n_s32::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_n_s64() {
let a: i64x2 = i64x2::new(-9223372036854775808, 4);
let e: i32x2 = i32x2::new(-2147483648, 1);
let r: i32x2 = transmute(vqrshrn_n_s64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_n_u16() {
let a: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let r: u8x8 = transmute(vqrshrn_n_u16::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_n_u32() {
let a: u32x4 = u32x4::new(0, 4, 8, 12);
let e: u16x4 = u16x4::new(0, 1, 2, 3);
let r: u16x4 = transmute(vqrshrn_n_u32::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrn_n_u64() {
let a: u64x2 = u64x2::new(0, 4);
let e: u32x2 = u32x2::new(0, 1);
let r: u32x2 = transmute(vqrshrn_n_u64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrun_n_s16() {
let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28);
let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let r: u8x8 = transmute(vqrshrun_n_s16::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrun_n_s32() {
let a: i32x4 = i32x4::new(0, 4, 8, 12);
let e: u16x4 = u16x4::new(0, 1, 2, 3);
let r: u16x4 = transmute(vqrshrun_n_s32::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqrshrun_n_s64() {
let a: i64x2 = i64x2::new(0, 4);
let e: u32x2 = u32x2::new(0, 1);
let r: u32x2 = transmute(vqrshrun_n_s64::<2>(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrsqrte_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);

View file

@ -2007,6 +2007,197 @@ aarch64 = fminnmp
link-aarch64 = fminnmp._EXT_
generate float32x4_t:float32x4_t:float32x4_t
/// Signed saturating rounding shift left
name = vqrshl
a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
validate MIN, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
aarch64 = sqrshl
link-aarch64 = sqrshl._EXT_
arm = vqrshl
link-arm = vqrshifts._EXT_
generate int*_t, int64x*_t
/// Signed saturating rounding shift left
name = vqrshl
multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
multi_fn = simd_extract, {vqrshl-in_ntt-noext, a, b}, 0
a = 1
b = 2
validate 4
aarch64 = sqrshl
generate i8, i16, i32, i64
/// Unsigned signed saturating rounding shift left
name = vqrshl
out-suffix
a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
validate 0, MAX, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60
aarch64 = uqrshl
link-aarch64 = uqrshl._EXT_
arm = vqrshl
link-arm = vqrshiftu._EXT_
generate uint8x8_t:int8x8_t:uint8x8_t, uint8x16_t:int8x16_t:uint8x16_t, uint16x4_t:int16x4_t:uint16x4_t, uint16x8_t:int16x8_t:uint16x8_t
generate uint32x2_t:int32x2_t:uint32x2_t, uint32x4_t:int32x4_t:uint32x4_t, uint64x1_t:int64x1_t:uint64x1_t, uint64x2_t:int64x2_t:uint64x2_t
/// Unsigned signed saturating rounding shift left
name = vqrshl
out-suffix
multi_fn = vdup_n-out_ntt-noext, a:out_ntt, a
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
multi_fn = simd_extract, {vqrshl-out_ntt-noext, a, b}, 0
a = 1
b = 2
validate 4
aarch64 = uqrshl
generate u8:i8:u8, u16:i16:u16, u32:i32:u32, u64:i64:u64
/// Signed saturating rounded shift right narrow
name = vqrshrn
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
a = MIN, 4, 8, 12, 16, 20, 24, 28
n = 2
validate MIN, 1, 2, 3, 4, 5, 6, 7
aarch64 = sqrshrn
link-aarch64 = sqrshrn._EXT2_
const-aarch64 = N
arm = vqrshrn
link-arm = vqrshiftns._EXT2_
const-arm = -N as ttn
generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t
/// Signed saturating rounded shift right narrow
name = vqrshrn
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
multi_fn = simd_extract, {vqrshrn_n-in_ntt-::<N>, a}, 0
a = 4
n = 2
validate 1
aarch64 = sqrshrn
generate i16:i8, i32:i16, i64:i32
/// Signed saturating rounded shift right narrow
name = vqrshrn_high
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
multi_fn = simd_shuffle-out_len-noext, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
a = 0, 1, 2, 3, 2, 3, 6, 7
b = 8, 12, 24, 28, 48, 52, 56, 60
n = 2
validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
aarch64 = sqrshrn2
generate int8x8_t:int16x8_t:int8x16_t, int16x4_t:int32x4_t:int16x8_t, int32x2_t:int64x2_t:int32x4_t
/// Unsigned signed saturating rounded shift right narrow
name = vqrshrn
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
a = MIN, 4, 8, 12, 16, 20, 24, 28
n = 2
validate 0, 1, 2, 3, 4, 5, 6, 7
aarch64 = uqrshrn
link-aarch64 = uqrshrn._EXT2_
const-aarch64 = N
arm = vqrshrn
link-arm = vqrshiftnu._EXT2_
const-arm = -N as ttn
generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t
/// Unsigned saturating rounded shift right narrow
name = vqrshrn
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
multi_fn = simd_extract, {vqrshrn_n-in_ntt-::<N>, a}, 0
a = 4
n = 2
validate 1
aarch64 = uqrshrn
generate u16:u8, u32:u16, u64:u32
/// Unsigned saturating rounded shift right narrow
name = vqrshrn_high
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
multi_fn = simd_shuffle-out_len-noext, a, {vqrshrn_n-noqself-::<N>, b}, {asc-0-out_len}
a = 0, 1, 2, 3, 2, 3, 6, 7
b = 8, 12, 24, 28, 48, 52, 56, 60
n = 2
validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
aarch64 = uqrshrn2
generate uint8x8_t:uint16x8_t:uint8x16_t, uint16x4_t:uint32x4_t:uint16x8_t, uint32x2_t:uint64x2_t:uint32x4_t
/// Signed saturating rounded shift right unsigned narrow
name = vqrshrun
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
a = 0, 4, 8, 12, 16, 20, 24, 28
n = 2
validate 0, 1, 2, 3, 4, 5, 6, 7
aarch64 = sqrshrun
link-aarch64 = sqrshrun._EXT2_
const-aarch64 = N
arm = vqrshrun
link-arm = vqrshiftnsu._EXT2_
const-arm = -N as ttn
generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t
/// Signed saturating rounded shift right unsigned narrow
name = vqrshrun
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
multi_fn = vdupq_n-in_ntt-noext, a:in_long_ntt, a
multi_fn = simd_extract, {vqrshrun_n-in_ntt-::<N>, a}, 0
a = 4
n = 2
validate 1
aarch64 = sqrshrun
generate i16:u8, i32:u16, i64:u32
/// Signed saturating rounded shift right unsigned narrow
name = vqrshrun_high
noq-n-suffix
constn = N
multi_fn = static_assert-N-1-halfbits
multi_fn = simd_shuffle-out_len-noext, a, {vqrshrun_n-noqself-::<N>, b}, {asc-0-out_len}
a = 0, 1, 2, 3, 2, 3, 6, 7
b = 8, 12, 24, 28, 48, 52, 56, 60
n = 2
validate 0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 12, 13, 14, 15
aarch64 = sqrshrun2
generate uint8x8_t:int16x8_t:uint8x16_t, uint16x4_t:int32x4_t:uint16x8_t, uint32x2_t:int64x2_t:uint32x4_t
/// Calculates the square root of each lane.
name = vsqrt
fn = simd_fsqrt

View file

@ -88,10 +88,13 @@ fn type_len(t: &str) -> usize {
fn type_bits(t: &str) -> usize {
match t {
"int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t" => 8,
"int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t" => 16,
"int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" => 32,
"int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t" => 64,
"int8x8_t" | "int8x16_t" | "uint8x8_t" | "uint8x16_t" | "poly8x8_t" | "poly8x16_t"
| "i8" | "u8" => 8,
"int16x4_t" | "int16x8_t" | "uint16x4_t" | "uint16x8_t" | "poly16x4_t" | "poly16x8_t"
| "i16" | "u16" => 16,
"int32x2_t" | "int32x4_t" | "uint32x2_t" | "uint32x4_t" | "i32" | "u32" => 32,
"int64x1_t" | "int64x2_t" | "uint64x1_t" | "uint64x2_t" | "poly64x1_t" | "poly64x2_t"
| "i64" | "u64" => 64,
_ => panic!("unknown type: {}", t),
}
}
@ -220,6 +223,34 @@ fn type_to_n_suffix(t: &str) -> &str {
}
}
fn type_to_noq_n_suffix(t: &str) -> &str {
match t {
"int8x8_t" | "int8x16_t" => "_n_s8",
"int16x4_t" | "int16x8_t" => "_n_s16",
"int32x2_t" | "int32x4_t" => "_n_s32",
"int64x1_t" | "int64x2_t" => "_n_s64",
"uint8x8_t" | "uint8x16_t" => "_n_u8",
"uint16x4_t" | "uint16x8_t" => "_n_u16",
"uint32x2_t" | "uint32x4_t" => "_n_u32",
"uint64x1_t" | "uint64x2_t" => "_n_u64",
"float16x4_t" | "float16x8_t" => "_n_f16",
"float32x2_t" | "float32x4_t" => "_n_f32",
"float64x1_t" | "float64x2_t" => "_n_f64",
"poly8x8_t" | "poly8x16_t" => "_n_p8",
"poly16x4_t" | "poly16x8_t" => "_n_p16",
"poly64x1_t" | "poly64x2_t" => "_n_p64",
"i8" => "b_n_s8",
"i16" => "h_n_s16",
"i32" => "s_n_s32",
"i64" => "d_n_s64",
"u8" => "b_n_u8",
"u16" => "h_n_u16",
"u32" => "s_n_u32",
"u64" => "d_n_u64",
_ => panic!("unknown type: {}", t),
}
}
fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
let mut str = String::new();
let suf = type_to_suffix(out_t);
@ -323,6 +354,7 @@ enum Suffix {
NoQ,
NoQDouble,
NSuffix,
NoQNSuffix,
OutSuffix,
Lane,
}
@ -381,30 +413,52 @@ fn type_to_global_type(t: &str) -> &str {
fn type_to_native_type(t: &str) -> &str {
match t {
"int8x8_t" => "i8",
"int8x16_t" => "i8",
"int16x4_t" => "i16",
"int16x8_t" => "i16",
"int32x2_t" => "i32",
"int32x4_t" => "i32",
"int64x1_t" => "i64",
"int64x2_t" => "i64",
"uint8x8_t" => "u8",
"uint8x16_t" => "u8",
"uint16x4_t" => "u16",
"uint16x8_t" => "u16",
"uint32x2_t" => "u32",
"uint32x4_t" => "u32",
"uint64x1_t" => "u64",
"uint64x2_t" => "u64",
"float16x4_t" => "f16",
"float16x8_t" => "f16",
"float32x2_t" => "f32",
"float32x4_t" => "f32",
"float64x1_t" => "f64",
"float64x2_t" => "f64",
"poly64x1_t" => "u64",
"poly64x2_t" => "u64",
"int8x8_t" | "int8x16_t" | "i8" => "i8",
"int16x4_t" | "int16x8_t" | "i16" => "i16",
"int32x2_t" | "int32x4_t" | "i32" => "i32",
"int64x1_t" | "int64x2_t" | "i64" => "i64",
"uint8x8_t" | "uint8x16_t" | "u8" => "u8",
"uint16x4_t" | "uint16x8_t" | "u16" => "u16",
"uint32x2_t" | "uint32x4_t" | "u32" => "u32",
"uint64x1_t" | "uint64x2_t" | "u64" => "u64",
"float16x4_t" | "float16x8_t" => "f16",
"float32x2_t" | "float32x4_t" => "f32",
"float64x1_t" | "float64x2_t" => "f64",
"poly64x1_t" | "poly64x2_t" => "u64",
_ => panic!("unknown type: {}", t),
}
}
fn native_type_to_type(t: &str) -> &str {
match t {
"i8" => "int8x8_t",
"i16" => "int16x4_t",
"i32" => "int32x2_t",
"i64" => "int64x1_t",
"u8" => "uint8x8_t",
"u16" => "uint16x4_t",
"u32" => "uint32x2_t",
"u64" => "uint64x1_t",
"f16" => "float16x4_t",
"f32" => "float32x2_t",
"f64" => "float64x1_t",
_ => panic!("unknown type: {}", t),
}
}
fn native_type_to_long_type(t: &str) -> &str {
match t {
"i8" => "int8x16_t",
"i16" => "int16x8_t",
"i32" => "int32x4_t",
"i64" => "int64x2_t",
"u8" => "uint8x16_t",
"u16" => "uint16x8_t",
"u32" => "uint32x4_t",
"u64" => "uint64x2_t",
"f16" => "float16x8_t",
"f32" => "float32x4_t",
"f64" => "float64x2_t",
_ => panic!("unknown type: {}", t),
}
}
@ -437,6 +491,14 @@ fn type_to_ext(t: &str) -> &str {
"poly8x16_t" => "v16i8",
"poly16x4_t" => "v4i16",
"poly16x8_t" => "v8i16",
"i8" => "v8i8",
"i16" => "v4i16",
"i32" => "v2i32",
"i64" => "v1i64",
"u8" => "v8i8",
"u16" => "v4i16",
"u32" => "v2i32",
"u64" => "v1i64",
/*
"poly64x1_t" => "i64x1",
"poly64x2_t" => "i64x2",
@ -757,6 +819,7 @@ fn gen_aarch64(
current_name: &str,
current_aarch64: &Option<String>,
link_aarch64: &Option<String>,
const_aarch64: &Option<String>,
constn: &Option<String>,
in_t: &[&str; 3],
out_t: &str,
@ -788,6 +851,7 @@ fn gen_aarch64(
type_to_noq_double_suffixes(out_t, in_t[1])
),
NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])),
NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
};
@ -799,25 +863,21 @@ fn gen_aarch64(
)
}
current_fn
} else if !multi_fn.is_empty() {
if link_aarch64.is_some() {
panic!(
"[{}] Can't specify link and (multi) fn at the same time.",
name
)
}
String::new()
} else if link_aarch64.is_some() {
format!("{}_", name)
} else {
if link_aarch64.is_none() {
if multi_fn.is_empty() {
panic!(
"[{}] Either (multi) fn or link-aarch have to be specified.",
name
)
}
format!("{}_", name)
String::new()
};
let current_aarch64 = current_aarch64.clone().unwrap();
let ext_c = if let Some(link_aarch64) = link_aarch64.clone() {
let mut ext_c = String::new();
let mut ext_c_const = String::new();
if let Some(link_aarch64) = link_aarch64.clone() {
let ext = type_to_ext(in_t[0]);
let ext2 = type_to_ext(out_t);
let link_aarch64 = if link_aarch64.starts_with("llvm") {
@ -827,7 +887,7 @@ fn gen_aarch64(
link.push_str(&link_aarch64);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
format!(
ext_c = format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
@ -849,9 +909,32 @@ fn gen_aarch64(
_ => unimplemented!("unknown para_num"),
},
out_t
)
} else {
String::new()
);
if const_aarch64.is_some() {
ext_c_const = format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_aarch64,
current_fn,
match para_num {
1 => {
format!("a: {}, n: i32", in_t[0])
}
2 => {
format!("a: {}, b: {}, n: i32", in_t[0], in_t[1])
}
3 => {
format!("a: {}, b: {}, c: {}, n: i32", in_t[0], in_t[1], in_t[2])
}
_ => unimplemented!("unknown para_num"),
},
out_t
);
}
};
let multi_calls = if !multi_fn.is_empty() {
let mut calls = String::new();
@ -891,60 +974,93 @@ fn gen_aarch64(
} else {
String::new()
};
let call = match (multi_calls.len(), para_num, fixed.len()) {
(0, 1, 0) => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}(a)
}}"#,
name, const_declare, in_t[0], out_t, ext_c, current_fn,
),
(0, 1, _) => {
let fixed: Vec<String> = fixed.iter().take(type_len(in_t[0])).cloned().collect();
format!(
let call = if let Some(const_aarch64) = const_aarch64 {
match para_num {
1 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
let b{};
{}{}(a, transmute(b))
{}
{}{}(a, {})
}}"#,
name,
const_declare,
in_t[0],
out_t,
values(in_t[0], &fixed),
ext_c,
multi_calls,
ext_c_const,
current_fn,
)
const_aarch64
),
2 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}{}(a, b, {})
}}"#,
name,
const_declare,
in_t[0],
out_t,
multi_calls,
ext_c_const,
current_fn,
const_aarch64
),
_ => String::new(),
}
(0, 2, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
} else {
match (multi_calls.len(), para_num, fixed.len()) {
(0, 1, 0) => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}(a)
}}"#,
name, const_declare, in_t[0], out_t, ext_c, current_fn,
),
(0, 1, _) => {
let fixed: Vec<String> = fixed.iter().take(type_len(in_t[0])).cloned().collect();
format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
let b{};
{}{}(a, transmute(b))
}}"#,
name,
const_declare,
in_t[0],
out_t,
values(in_t[0], &fixed),
ext_c,
current_fn,
)
}
(0, 2, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
{}{}(a, b)
}}"#,
name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn,
),
(0, 3, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{
name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn,
),
(0, 3, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{
{}{}(a, b, c)
}}"#,
name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn,
),
(_, 1, _) => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn,
),
(_, 1, _) => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}
}}"#,
name, const_declare, in_t[0], out_t, ext_c, multi_calls,
),
(_, 2, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
name, const_declare, in_t[0], out_t, ext_c, multi_calls,
),
(_, 2, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
{}{}
}}"#,
name, const_declare, in_t[0], in_t[1], out_t, ext_c, multi_calls,
),
(_, 3, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{
name, const_declare, in_t[0], in_t[1], out_t, ext_c, multi_calls,
),
(_, 3, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{
{}{}
}}"#,
name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls,
),
(_, _, _) => String::new(),
name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls,
),
(_, _, _) => String::new(),
}
};
let function = format!(
r#"
@ -1074,6 +1190,8 @@ fn gen_arm(
link_arm: &Option<String>,
current_aarch64: &Option<String>,
link_aarch64: &Option<String>,
const_arm: &Option<String>,
const_aarch64: &Option<String>,
constn: &Option<String>,
in_t: &[&str; 3],
out_t: &str,
@ -1106,6 +1224,7 @@ fn gen_arm(
type_to_noq_double_suffixes(out_t, in_t[1])
),
NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])),
NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
};
@ -1126,69 +1245,118 @@ fn gen_arm(
)
}
current_fn
} else if !multi_fn.is_empty() {
if link_aarch64.is_some() || link_arm.is_some() {
panic!(
"[{}] Can't specify link and function at the same time. multi_fn / {:?} / {:?}",
name, link_aarch64, link_arm
)
}
String::new()
} else if link_aarch64.is_some() || link_arm.is_some() {
format!("{}_", name)
} else {
if link_aarch64.is_none() && link_arm.is_none() {
if multi_fn.is_empty() {
panic!(
"[{}] Either fn or link-arm and link-aarch have to be specified.",
name
)
}
format!("{}_", name)
String::new()
};
let ext_c =
if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
let ext = type_to_ext(in_t[0]);
let ext2 = type_to_ext(out_t);
let link_arm = if link_arm.starts_with("llvm") {
link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2)
} else {
let mut link = String::from("llvm.arm.neon.");
link.push_str(&link_arm);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
let link_aarch64 = if link_aarch64.starts_with("llvm") {
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2)
} else {
let mut link = String::from("llvm.aarch64.neon.");
link.push_str(&link_aarch64);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
format!(
r#"#[allow(improper_ctypes)]
let mut ext_c = String::new();
let mut ext_c_const_arm = String::new();
let mut ext_c_const_aarch64 = String::new();
if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
let ext = type_to_ext(in_t[0]);
let ext2 = type_to_ext(out_t);
let link_arm = if link_arm.starts_with("llvm") {
link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2)
} else {
let mut link = String::from("llvm.arm.neon.");
link.push_str(&link_arm);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
let link_aarch64 = if link_aarch64.starts_with("llvm") {
link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2)
} else {
let mut link = String::from("llvm.aarch64.neon.");
link.push_str(&link_aarch64);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
ext_c = format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "arm", link_name = "{}")]
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_arm,
link_aarch64,
current_fn,
match para_num {
1 => {
format!("a: {}", in_t[0])
}
2 => {
format!("a: {}, b: {}", in_t[0], in_t[1])
}
3 => {
format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2])
}
_ => unimplemented!("unknown para_num"),
},
out_t
);
if const_arm.is_some() {
ext_c_const_arm = format!(
r#"
#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "arm", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_arm,
link_aarch64,
current_fn,
match para_num {
1 => {
format!("a: {}", in_t[0])
format!("a: {}, n: {}", in_t[0], in_t[0])
}
2 => {
format!("a: {}, b: {}", in_t[0], in_t[1])
format!("a: {}, b: {}, n: {}", in_t[0], in_t[1], in_t[1])
}
3 => {
format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2])
format!(
"a: {}, b: {}, c: {}, n: {}",
in_t[0], in_t[1], in_t[2], in_t[2]
)
}
_ => unimplemented!("unknown para_num"),
},
out_t
)
} else {
String::new()
};
);
}
if const_aarch64.is_some() {
ext_c_const_aarch64 = format!(
r#"
#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_aarch64,
current_fn,
match para_num {
1 => {
format!("a: {}, n: i32", in_t[0])
}
2 => {
format!("a: {}, b: {}, n: i32", in_t[0], in_t[1])
}
3 => {
format!("a: {}, b: {}, c: {}, n: i32", in_t[0], in_t[1], in_t[2])
}
_ => unimplemented!("unknown para_num"),
},
out_t
);
}
};
let multi_calls = if !multi_fn.is_empty() {
let mut calls = String::new();
for i in 0..multi_fn.len() {
@ -1282,8 +1450,99 @@ fn gen_arm(
),
(_, _, _) => String::new(),
};
let function = format!(
r#"
let call_const_arm = if let Some(const_arm) = const_arm {
let const_arm = const_arm.replace("ttn", type_to_native_type(in_t[1]));
let mut cnt = String::from(in_t[1]);
cnt.push_str("(");
for i in 0..type_len(in_t[1]) {
if i != 0 {
cnt.push_str(", ");
}
cnt.push_str(&const_arm);
}
cnt.push_str(")");
match para_num {
1 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}{}(a, {})
}}"#,
name, const_declare, in_t[0], out_t, multi_calls, ext_c_const_arm, current_fn, cnt
),
2 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}{}(a, b, {})
}}"#,
name, const_declare, in_t[0], out_t, multi_calls, ext_c_const_arm, current_fn, cnt
),
_ => String::new(),
}
} else {
String::new()
};
let call_const_aarch64 = if let Some(const_aarch64) = const_aarch64 {
match para_num {
1 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}{}(a, {})
}}"#,
name,
const_declare,
in_t[0],
out_t,
multi_calls,
ext_c_const_aarch64,
current_fn,
const_aarch64
),
2 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}{}(a, b, {})
}}"#,
name,
const_declare,
in_t[0],
out_t,
multi_calls,
ext_c_const_aarch64,
current_fn,
const_aarch64
),
_ => String::new(),
}
} else {
String::new()
};
let function = if const_arm.is_some() && const_aarch64.is_some() {
format!(
r#"
{}
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}{}))]{}
{}
{}
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}{}))]{}
{}
"#,
current_comment,
expand_intrinsic(&current_arm, in_t[1]),
const_assert,
const_legacy,
call_const_arm,
current_comment,
expand_intrinsic(&current_aarch64, in_t[1]),
const_assert,
const_legacy,
call_const_aarch64,
)
} else {
format!(
r#"
{}
#[inline]
#[target_feature(enable = "neon")]
@ -1292,15 +1551,16 @@ fn gen_arm(
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}{}))]{}
{}
"#,
current_comment,
current_target,
expand_intrinsic(&current_arm, in_t[1]),
const_assert,
expand_intrinsic(&current_aarch64, in_t[1]),
const_assert,
const_legacy,
call,
);
current_comment,
current_target,
expand_intrinsic(&current_arm, in_t[1]),
const_assert,
expand_intrinsic(&current_aarch64, in_t[1]),
const_assert,
const_legacy,
call,
)
};
let test = gen_test(
&name,
in_t,
@ -1611,6 +1871,18 @@ fn get_call(
re = Some((re_params[0].clone(), out_t.to_string()));
} else if re_params[1] == "half" {
re = Some((re_params[0].clone(), type_to_half(in_t[1]).to_string()));
} else if re_params[1] == "in_ntt" {
re = Some((
re_params[0].clone(),
native_type_to_type(in_t[1]).to_string(),
));
} else if re_params[1] == "in_long_ntt" {
re = Some((
re_params[0].clone(),
native_type_to_long_type(in_t[1]).to_string(),
));
} else if re_params[1] == "out_ntt" {
re = Some((re_params[0].clone(), native_type_to_type(out_t).to_string()));
} else {
re = Some((re_params[0].clone(), re_params[1].clone()));
}
@ -1669,6 +1941,10 @@ fn get_call(
fn_name.push_str(&(type_len(in_t[1]) / 2).to_string());
} else if fn_format[1] == "nout" {
fn_name.push_str(type_to_n_suffix(out_t));
} else if fn_format[1] == "in_ntt" {
fn_name.push_str(type_to_suffix(native_type_to_type(in_t[1])));
} else if fn_format[1] == "out_ntt" {
fn_name.push_str(type_to_suffix(native_type_to_type(out_t)));
} else {
fn_name.push_str(&fn_format[1]);
};
@ -1725,6 +2001,8 @@ fn main() -> io::Result<()> {
let mut current_aarch64: Option<String> = None;
let mut link_arm: Option<String> = None;
let mut link_aarch64: Option<String> = None;
let mut const_arm: Option<String> = None;
let mut const_aarch64: Option<String> = None;
let mut constn: Option<String> = None;
let mut para_num = 2;
let mut suffix: Suffix = Normal;
@ -1810,6 +2088,8 @@ mod test {
current_aarch64 = None;
link_aarch64 = None;
link_arm = None;
const_aarch64 = None;
const_arm = None;
current_tests = Vec::new();
constn = None;
para_num = 2;
@ -1842,6 +2122,8 @@ mod test {
suffix = NoQDouble;
} else if line.starts_with("n-suffix") {
suffix = NSuffix;
} else if line.starts_with("noq-n-suffix") {
suffix = NoQNSuffix;
} else if line.starts_with("out-suffix") {
suffix = OutSuffix;
} else if line.starts_with("lane-suffixes") {
@ -1861,8 +2143,12 @@ mod test {
current_tests.push((a.clone(), b.clone(), c.clone(), n.clone(), e));
} else if line.starts_with("link-aarch64 = ") {
link_aarch64 = Some(String::from(&line[15..]));
} else if line.starts_with("const-aarch64 = ") {
const_aarch64 = Some(String::from(&line[16..]));
} else if line.starts_with("link-arm = ") {
link_arm = Some(String::from(&line[11..]));
} else if line.starts_with("const-arm = ") {
const_arm = Some(String::from(&line[12..]));
} else if line.starts_with("target = ") {
target = match Some(String::from(&line[9..])) {
Some(input) => match input.as_str() {
@ -1921,6 +2207,8 @@ mod test {
&link_arm,
&current_aarch64,
&link_aarch64,
&const_arm,
&const_aarch64,
&constn,
&in_t,
&out_t,
@ -1940,6 +2228,7 @@ mod test {
&current_name,
&current_aarch64,
&link_aarch64,
&const_aarch64,
&constn,
&in_t,
&out_t,