Add vqdmul* neon instructions (#1130)
This commit is contained in:
parent
20c0120362
commit
de3e8f72c5
4 changed files with 2155 additions and 40 deletions
File diff suppressed because it is too large
Load diff
|
|
@ -3567,7 +3567,7 @@ vqnegq_s32_(a)
|
|||
pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v8i8")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i8")]
|
||||
fn vqsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
|
||||
}
|
||||
|
|
@ -3583,7 +3583,7 @@ vqsub_u8_(a, b)
|
|||
pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v16i8")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v16i8")]
|
||||
fn vqsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
|
||||
}
|
||||
|
|
@ -3599,7 +3599,7 @@ vqsubq_u8_(a, b)
|
|||
pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v4i16")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i16")]
|
||||
fn vqsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
|
||||
}
|
||||
|
|
@ -3615,7 +3615,7 @@ vqsub_u16_(a, b)
|
|||
pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v8i16")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i16")]
|
||||
fn vqsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
|
||||
}
|
||||
|
|
@ -3631,7 +3631,7 @@ vqsubq_u16_(a, b)
|
|||
pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v2i32")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i32")]
|
||||
fn vqsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
|
||||
}
|
||||
|
|
@ -3647,7 +3647,7 @@ vqsub_u32_(a, b)
|
|||
pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v4i32")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i32")]
|
||||
fn vqsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
|
||||
}
|
||||
|
|
@ -3663,7 +3663,7 @@ vqsubq_u32_(a, b)
|
|||
pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v1i64")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v1i64")]
|
||||
fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
|
||||
}
|
||||
|
|
@ -3679,7 +3679,7 @@ vqsub_u64_(a, b)
|
|||
pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v2i64")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i64")]
|
||||
fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
|
||||
}
|
||||
|
|
@ -3695,7 +3695,7 @@ vqsubq_u64_(a, b)
|
|||
pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v8i8")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i8")]
|
||||
fn vqsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
|
||||
}
|
||||
|
|
@ -3711,7 +3711,7 @@ vqsub_s8_(a, b)
|
|||
pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v16i8")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v16i8")]
|
||||
fn vqsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
|
||||
}
|
||||
|
|
@ -3727,7 +3727,7 @@ vqsubq_s8_(a, b)
|
|||
pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v4i16")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i16")]
|
||||
fn vqsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
|
||||
}
|
||||
|
|
@ -3743,7 +3743,7 @@ vqsub_s16_(a, b)
|
|||
pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v8i16")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i16")]
|
||||
fn vqsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
|
||||
}
|
||||
|
|
@ -3759,7 +3759,7 @@ vqsubq_s16_(a, b)
|
|||
pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v2i32")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i32")]
|
||||
fn vqsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
|
||||
}
|
||||
|
|
@ -3775,7 +3775,7 @@ vqsub_s32_(a, b)
|
|||
pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v4i32")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i32")]
|
||||
fn vqsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
|
||||
}
|
||||
|
|
@ -3791,7 +3791,7 @@ vqsubq_s32_(a, b)
|
|||
pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v1i64")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v1i64")]
|
||||
fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
|
||||
}
|
||||
|
|
@ -3807,7 +3807,7 @@ vqsub_s64_(a, b)
|
|||
pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v2i64")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i64")]
|
||||
fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
|
||||
}
|
||||
|
|
@ -4207,7 +4207,7 @@ vrhaddq_s32_(a, b)
|
|||
pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v8i8")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i8")]
|
||||
fn vqadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t;
|
||||
}
|
||||
|
|
@ -4223,7 +4223,7 @@ vqadd_u8_(a, b)
|
|||
pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v16i8")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v16i8")]
|
||||
fn vqaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t;
|
||||
}
|
||||
|
|
@ -4239,7 +4239,7 @@ vqaddq_u8_(a, b)
|
|||
pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v4i16")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i16")]
|
||||
fn vqadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t;
|
||||
}
|
||||
|
|
@ -4255,7 +4255,7 @@ vqadd_u16_(a, b)
|
|||
pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v8i16")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i16")]
|
||||
fn vqaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t;
|
||||
}
|
||||
|
|
@ -4271,7 +4271,7 @@ vqaddq_u16_(a, b)
|
|||
pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v2i32")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i32")]
|
||||
fn vqadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t;
|
||||
}
|
||||
|
|
@ -4287,7 +4287,7 @@ vqadd_u32_(a, b)
|
|||
pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v4i32")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i32")]
|
||||
fn vqaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t;
|
||||
}
|
||||
|
|
@ -4303,7 +4303,7 @@ vqaddq_u32_(a, b)
|
|||
pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v1i64")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v1i64")]
|
||||
fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t;
|
||||
}
|
||||
|
|
@ -4319,7 +4319,7 @@ vqadd_u64_(a, b)
|
|||
pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v2i64")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i64")]
|
||||
fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t;
|
||||
}
|
||||
|
|
@ -4335,7 +4335,7 @@ vqaddq_u64_(a, b)
|
|||
pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v8i8")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i8")]
|
||||
fn vqadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t;
|
||||
}
|
||||
|
|
@ -4351,7 +4351,7 @@ vqadd_s8_(a, b)
|
|||
pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v16i8")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v16i8")]
|
||||
fn vqaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t;
|
||||
}
|
||||
|
|
@ -4367,7 +4367,7 @@ vqaddq_s8_(a, b)
|
|||
pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v4i16")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i16")]
|
||||
fn vqadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
|
||||
}
|
||||
|
|
@ -4383,7 +4383,7 @@ vqadd_s16_(a, b)
|
|||
pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v8i16")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i16")]
|
||||
fn vqaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
|
||||
}
|
||||
|
|
@ -4399,7 +4399,7 @@ vqaddq_s16_(a, b)
|
|||
pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v2i32")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i32")]
|
||||
fn vqadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
|
||||
}
|
||||
|
|
@ -4415,7 +4415,7 @@ vqadd_s32_(a, b)
|
|||
pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v4i32")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i32")]
|
||||
fn vqaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
|
||||
}
|
||||
|
|
@ -4431,7 +4431,7 @@ vqaddq_s32_(a, b)
|
|||
pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v1i64")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v1i64")]
|
||||
fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t;
|
||||
}
|
||||
|
|
@ -4447,7 +4447,7 @@ vqadd_s64_(a, b)
|
|||
pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v2i64")]
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i64")]
|
||||
fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t;
|
||||
}
|
||||
|
|
@ -5908,6 +5908,320 @@ pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
|
|||
vminnmq_f32_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
|
||||
pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v4i32")]
|
||||
fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
|
||||
}
|
||||
vqdmull_s16_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
|
||||
pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v2i64")]
|
||||
fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
|
||||
}
|
||||
vqdmull_s32_(a, b)
|
||||
}
|
||||
|
||||
/// Vector saturating doubling long multiply with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
|
||||
pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
|
||||
vqdmull_s16(a, vdup_n_s16(b))
|
||||
}
|
||||
|
||||
/// Vector saturating doubling long multiply with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))]
|
||||
pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
|
||||
vqdmull_s32(a, vdup_n_s32(b))
|
||||
}
|
||||
|
||||
/// Vector saturating doubling long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 2))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqdmull_lane_s16<const N: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
|
||||
static_assert_imm2!(N);
|
||||
let b: int16x4_t = simd_shuffle4(b, b, [N as u32, N as u32, N as u32, N as u32]);
|
||||
vqdmull_s16(a, b)
|
||||
}
|
||||
|
||||
/// Vector saturating doubling long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vqdmull_lane_s32<const N: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
|
||||
static_assert_imm1!(N);
|
||||
let b: int32x2_t = simd_shuffle2(b, b, [N as u32, N as u32]);
|
||||
vqdmull_s32(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
|
||||
pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
|
||||
vqaddq_s32(a, vqdmull_s16(b, c))
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
|
||||
pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
|
||||
vqaddq_s64(a, vqdmull_s32(b, c))
|
||||
}
|
||||
|
||||
/// Vector widening saturating doubling multiply accumulate with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
|
||||
pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
|
||||
vqaddq_s32(a, vqdmull_n_s16(b, c))
|
||||
}
|
||||
|
||||
/// Vector widening saturating doubling multiply accumulate with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))]
|
||||
pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
|
||||
vqaddq_s64(a, vqdmull_n_s32(b, c))
|
||||
}
|
||||
|
||||
/// Vector widening saturating doubling multiply accumulate with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 2))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vqdmlal_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
|
||||
static_assert_imm2!(N);
|
||||
vqaddq_s32(a, vqdmull_lane_s16::<N>(b, c))
|
||||
}
|
||||
|
||||
/// Vector widening saturating doubling multiply accumulate with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vqdmlal_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
|
||||
static_assert_imm1!(N);
|
||||
vqaddq_s64(a, vqdmull_lane_s32::<N>(b, c))
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
|
||||
pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
|
||||
vqsubq_s32(a, vqdmull_s16(b, c))
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
|
||||
pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
|
||||
vqsubq_s64(a, vqdmull_s32(b, c))
|
||||
}
|
||||
|
||||
/// Vector widening saturating doubling multiply subtract with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
|
||||
pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t {
|
||||
vqsubq_s32(a, vqdmull_n_s16(b, c))
|
||||
}
|
||||
|
||||
/// Vector widening saturating doubling multiply subtract with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))]
|
||||
pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t {
|
||||
vqsubq_s64(a, vqdmull_n_s32(b, c))
|
||||
}
|
||||
|
||||
/// Vector widening saturating doubling multiply subtract with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 2))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vqdmlsl_lane_s16<const N: i32>(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
|
||||
static_assert_imm2!(N);
|
||||
vqsubq_s32(a, vqdmull_lane_s16::<N>(b, c))
|
||||
}
|
||||
|
||||
/// Vector widening saturating doubling multiply subtract with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 1))]
|
||||
#[rustc_legacy_const_generics(3)]
|
||||
pub unsafe fn vqdmlsl_lane_s32<const N: i32>(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
|
||||
static_assert_imm1!(N);
|
||||
vqsubq_s64(a, vqdmull_lane_s32::<N>(b, c))
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply returning high half
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
|
||||
pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i16")]
|
||||
fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t;
|
||||
}
|
||||
vqdmulh_s16_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply returning high half
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
|
||||
pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v8i16")]
|
||||
fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
|
||||
}
|
||||
vqdmulhq_s16_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply returning high half
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
|
||||
pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v2i32")]
|
||||
fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t;
|
||||
}
|
||||
vqdmulh_s32_(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating doubling multiply returning high half
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
|
||||
pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i32")]
|
||||
fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t;
|
||||
}
|
||||
vqdmulhq_s32_(a, b)
|
||||
}
|
||||
|
||||
/// Vector saturating doubling multiply high with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
|
||||
pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
|
||||
let b: int16x4_t = vdup_n_s16(b);
|
||||
vqdmulh_s16(a, b)
|
||||
}
|
||||
|
||||
/// Vector saturating doubling multiply high with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
|
||||
pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
|
||||
let b: int32x2_t = vdup_n_s32(b);
|
||||
vqdmulh_s32(a, b)
|
||||
}
|
||||
|
||||
/// Vector saturating doubling multiply high with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
|
||||
pub unsafe fn vqdmulhq_nq_s16(a: int16x8_t, b: i16) -> int16x8_t {
|
||||
let b: int16x8_t = vdupq_n_s16(b);
|
||||
vqdmulhq_s16(a, b)
|
||||
}
|
||||
|
||||
/// Vector saturating doubling multiply high with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))]
|
||||
pub unsafe fn vqdmulhq_nq_s32(a: int32x4_t, b: i32) -> int32x4_t {
|
||||
let b: int32x4_t = vdupq_n_s32(b);
|
||||
vqdmulhq_s32(a, b)
|
||||
}
|
||||
|
||||
/// Signed saturating rounding shift left
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -15760,6 +16074,252 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmull_s16() {
|
||||
let a: i16x4 = i16x4::new(0, 1, 2, 3);
|
||||
let b: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let e: i32x4 = i32x4::new(0, 4, 12, 24);
|
||||
let r: i32x4 = transmute(vqdmull_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmull_s32() {
|
||||
let a: i32x2 = i32x2::new(0, 1);
|
||||
let b: i32x2 = i32x2::new(1, 2);
|
||||
let e: i64x2 = i64x2::new(0, 4);
|
||||
let r: i64x2 = transmute(vqdmull_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmull_n_s16() {
|
||||
let a: i16x4 = i16x4::new(2, 4, 6, 8);
|
||||
let b: i16 = 2;
|
||||
let e: i32x4 = i32x4::new(8, 16, 24, 32);
|
||||
let r: i32x4 = transmute(vqdmull_n_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmull_n_s32() {
|
||||
let a: i32x2 = i32x2::new(2, 4);
|
||||
let b: i32 = 2;
|
||||
let e: i64x2 = i64x2::new(8, 16);
|
||||
let r: i64x2 = transmute(vqdmull_n_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmull_lane_s16() {
|
||||
let a: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let b: i16x4 = i16x4::new(0, 2, 2, 0);
|
||||
let e: i32x4 = i32x4::new(4, 8, 12, 16);
|
||||
let r: i32x4 = transmute(vqdmull_lane_s16::<2>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmull_lane_s32() {
|
||||
let a: i32x2 = i32x2::new(1, 2);
|
||||
let b: i32x2 = i32x2::new(0, 2);
|
||||
let e: i64x2 = i64x2::new(4, 8);
|
||||
let r: i64x2 = transmute(vqdmull_lane_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlal_s16() {
|
||||
let a: i32x4 = i32x4::new(1, 1, 1, 1);
|
||||
let b: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let c: i16x4 = i16x4::new(2, 2, 2, 2);
|
||||
let e: i32x4 = i32x4::new(5, 9, 13, 17);
|
||||
let r: i32x4 = transmute(vqdmlal_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlal_s32() {
|
||||
let a: i64x2 = i64x2::new(1, 1);
|
||||
let b: i32x2 = i32x2::new(1, 2);
|
||||
let c: i32x2 = i32x2::new(2, 2);
|
||||
let e: i64x2 = i64x2::new(5, 9);
|
||||
let r: i64x2 = transmute(vqdmlal_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlal_n_s16() {
|
||||
let a: i32x4 = i32x4::new(1, 1, 1, 1);
|
||||
let b: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let c: i16 = 2;
|
||||
let e: i32x4 = i32x4::new(5, 9, 13, 17);
|
||||
let r: i32x4 = transmute(vqdmlal_n_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlal_n_s32() {
|
||||
let a: i64x2 = i64x2::new(1, 1);
|
||||
let b: i32x2 = i32x2::new(1, 2);
|
||||
let c: i32 = 2;
|
||||
let e: i64x2 = i64x2::new(5, 9);
|
||||
let r: i64x2 = transmute(vqdmlal_n_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlal_lane_s16() {
|
||||
let a: i32x4 = i32x4::new(1, 2, 3, 4);
|
||||
let b: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let c: i16x4 = i16x4::new(0, 2, 2, 0);
|
||||
let e: i32x4 = i32x4::new(5, 10, 15, 20);
|
||||
let r: i32x4 = transmute(vqdmlal_lane_s16::<2>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlal_lane_s32() {
|
||||
let a: i64x2 = i64x2::new(1, 2);
|
||||
let b: i32x2 = i32x2::new(1, 2);
|
||||
let c: i32x2 = i32x2::new(0, 2);
|
||||
let e: i64x2 = i64x2::new(5, 10);
|
||||
let r: i64x2 = transmute(vqdmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlsl_s16() {
|
||||
let a: i32x4 = i32x4::new(3, 7, 11, 15);
|
||||
let b: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let c: i16x4 = i16x4::new(2, 2, 2, 2);
|
||||
let e: i32x4 = i32x4::new(-1, -1, -1, -1);
|
||||
let r: i32x4 = transmute(vqdmlsl_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlsl_s32() {
|
||||
let a: i64x2 = i64x2::new(3, 7);
|
||||
let b: i32x2 = i32x2::new(1, 2);
|
||||
let c: i32x2 = i32x2::new(2, 2);
|
||||
let e: i64x2 = i64x2::new(-1, -1);
|
||||
let r: i64x2 = transmute(vqdmlsl_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlsl_n_s16() {
|
||||
let a: i32x4 = i32x4::new(3, 7, 11, 15);
|
||||
let b: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let c: i16 = 2;
|
||||
let e: i32x4 = i32x4::new(-1, -1, -1, -1);
|
||||
let r: i32x4 = transmute(vqdmlsl_n_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlsl_n_s32() {
|
||||
let a: i64x2 = i64x2::new(3, 7);
|
||||
let b: i32x2 = i32x2::new(1, 2);
|
||||
let c: i32 = 2;
|
||||
let e: i64x2 = i64x2::new(-1, -1);
|
||||
let r: i64x2 = transmute(vqdmlsl_n_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlsl_lane_s16() {
|
||||
let a: i32x4 = i32x4::new(3, 6, 9, 12);
|
||||
let b: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let c: i16x4 = i16x4::new(0, 2, 2, 0);
|
||||
let e: i32x4 = i32x4::new(-1, -2, -3, -4);
|
||||
let r: i32x4 = transmute(vqdmlsl_lane_s16::<2>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmlsl_lane_s32() {
|
||||
let a: i64x2 = i64x2::new(3, 6);
|
||||
let b: i32x2 = i32x2::new(1, 2);
|
||||
let c: i32x2 = i32x2::new(0, 2);
|
||||
let e: i64x2 = i64x2::new(-1, -2);
|
||||
let r: i64x2 = transmute(vqdmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmulh_s16() {
|
||||
let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
|
||||
let b: i16x4 = i16x4::new(2, 2, 2, 2);
|
||||
let e: i16x4 = i16x4::new(1, 1, 1, 1);
|
||||
let r: i16x4 = transmute(vqdmulh_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmulhq_s16() {
|
||||
let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
|
||||
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
|
||||
let r: i16x8 = transmute(vqdmulhq_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmulh_s32() {
|
||||
let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
|
||||
let b: i32x2 = i32x2::new(2, 2);
|
||||
let e: i32x2 = i32x2::new(1, 1);
|
||||
let r: i32x2 = transmute(vqdmulh_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmulhq_s32() {
|
||||
let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
|
||||
let b: i32x4 = i32x4::new(2, 2, 2, 2);
|
||||
let e: i32x4 = i32x4::new(1, 1, 1, 1);
|
||||
let r: i32x4 = transmute(vqdmulhq_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmulh_n_s16() {
|
||||
let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
|
||||
let b: i16 = 2;
|
||||
let e: i16x4 = i16x4::new(1, 1, 1, 1);
|
||||
let r: i16x4 = transmute(vqdmulh_n_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmulh_n_s32() {
|
||||
let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
|
||||
let b: i32 = 2;
|
||||
let e: i32x2 = i32x2::new(1, 1);
|
||||
let r: i32x2 = transmute(vqdmulh_n_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmulhq_nq_s16() {
|
||||
let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF);
|
||||
let b: i16 = 2;
|
||||
let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1);
|
||||
let r: i16x8 = transmute(vqdmulhq_nq_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqdmulhq_nq_s32() {
|
||||
let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF);
|
||||
let b: i32 = 2;
|
||||
let e: i32x4 = i32x4::new(1, 1, 1, 1);
|
||||
let r: i32x4 = transmute(vqdmulhq_nq_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqrshl_s8() {
|
||||
let a: i8x8 = i8x8::new(-128, 0x7F, 2, 3, 4, 5, 6, 7);
|
||||
|
|
|
|||
|
|
@ -1296,16 +1296,30 @@ validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26
|
|||
|
||||
arm = vqsub.s
|
||||
aarch64 = uqsub
|
||||
link-arm = vqsubu._EXT_
|
||||
link-arm = llvm.usub.sat._EXT_
|
||||
link-aarch64 = uqsub._EXT_
|
||||
generate uint*_t, uint64x*_t
|
||||
|
||||
arm = vqsub.s
|
||||
aarch64 = sqsub
|
||||
link-arm = vqsubs._EXT_
|
||||
link-arm = llvm.ssub.sat._EXT_
|
||||
link-aarch64 = sqsub._EXT_
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Saturating subtract
|
||||
name = vqsub
|
||||
multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
|
||||
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
|
||||
multi_fn = simd_extract, {vqsub-in_ntt-noext, a, b}, 0
|
||||
a = 42
|
||||
b = 1
|
||||
validate 41
|
||||
|
||||
aarch64 = sqsub
|
||||
generate i8, i16, i32, i64
|
||||
aarch64 = uqsub
|
||||
generate u8, u16, u32, u64
|
||||
|
||||
/// Halving add
|
||||
name = vhadd
|
||||
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
|
||||
|
|
@ -1433,16 +1447,30 @@ validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58
|
|||
|
||||
arm = vqadd.s
|
||||
aarch64 = uqadd
|
||||
link-arm = vqaddu._EXT_
|
||||
link-arm = llvm.uadd.sat._EXT_
|
||||
link-aarch64 = uqadd._EXT_
|
||||
generate uint*_t, uint64x*_t
|
||||
|
||||
arm = vqadd.s
|
||||
aarch64 = sqadd
|
||||
link-arm = vqadds._EXT_
|
||||
link-arm = llvm.sadd.sat._EXT_
|
||||
link-aarch64 = sqadd._EXT_
|
||||
generate int*_t, int64x*_t
|
||||
|
||||
/// Saturating add
|
||||
name = vqadd
|
||||
multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
|
||||
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
|
||||
multi_fn = simd_extract, {vqadd-in_ntt-noext, a, b}, 0
|
||||
a = 42
|
||||
b = 1
|
||||
validate 43
|
||||
|
||||
aarch64 = sqadd
|
||||
generate i8, i16, i32, i64
|
||||
aarch64 = uqadd
|
||||
generate u8, u16, u32, u64
|
||||
|
||||
/// Multiply
|
||||
name = vmul
|
||||
a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
|
||||
|
|
@ -2135,6 +2163,395 @@ aarch64 = fminnmp
|
|||
link-aarch64 = fminnmp._EXT_
|
||||
generate float32x4_t:float32x4_t:float32x4_t
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7
|
||||
b = 1, 2, 3, 4, 5, 6, 7, 8
|
||||
validate 0, 4, 12, 24, 40, 60, 84, 108
|
||||
|
||||
aarch64 = sqdmull
|
||||
link-aarch64 = sqdmull._EXT2_
|
||||
arm = vqdmull
|
||||
link-arm = vqdmull._EXT2_
|
||||
generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull
|
||||
multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
|
||||
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
|
||||
multi_fn = simd_extract, {vqdmull-in_ntt-noext, a, b}, 0
|
||||
a = 2
|
||||
b = 3
|
||||
validate 12
|
||||
|
||||
aarch64 = sqdmull
|
||||
generate i16:i16:i32
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull
|
||||
a = 2
|
||||
b = 3
|
||||
validate 12
|
||||
|
||||
aarch64 = sqdmull
|
||||
link-aarch64 = sqdmulls.scalar
|
||||
generate i32:i32:i64
|
||||
|
||||
/// Vector saturating doubling long multiply with scalar
|
||||
name = vqdmull_n
|
||||
no-q
|
||||
multi_fn = vqdmull-in_ntt-noext, a, {vdup_n-in_ntt-noext, b}
|
||||
a = 2, 4, 6, 8
|
||||
b = 2
|
||||
validate 8, 16, 24, 32
|
||||
|
||||
aarch64 = sqdmull
|
||||
arm = vqdmull
|
||||
generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {asc-halflen-halflen}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {asc-halflen-halflen}
|
||||
multi_fn = vqdmull-noqself-noext, a, b
|
||||
a = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
b = 1, 2, 5, 6, 5, 6, 7, 8
|
||||
validate 40, 60, 84, 112
|
||||
|
||||
aarch64 = sqdmull2
|
||||
generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull_high_n
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:in_ntt, a, a, {asc-out_len-out_len}
|
||||
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
|
||||
multi_fn = vqdmull-in_ntt-noext, a, b
|
||||
a = 0, 2, 8, 10, 8, 10, 12, 14
|
||||
b = 2
|
||||
validate 32, 40, 48, 56
|
||||
|
||||
aarch64 = sqdmull2
|
||||
generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
|
||||
|
||||
/// Vector saturating doubling long multiply by scalar
|
||||
name = vqdmull_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, b:in_t0, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = vqdmull-noqself-noext, a, b
|
||||
a = 1, 2, 3, 4
|
||||
b = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
n = HFLEN
|
||||
validate 4, 8, 12, 16
|
||||
|
||||
aarch64 = sqdmull
|
||||
generate int16x4_t:int16x8_t:int32x4_t, int32x2_t:int32x4_t:int64x2_t
|
||||
|
||||
arm = vqdmull
|
||||
generate int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmullh_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_extract, b:in_t0, b, N as u32
|
||||
multi_fn = vqdmullh-noqself-noext, a, b
|
||||
a = 2
|
||||
b = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
n = HFLEN
|
||||
validate 8
|
||||
|
||||
aarch64 = sqdmull
|
||||
generate i16:int16x4_t:i32, i16:int16x8_t:i32
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmulls_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_extract, b:in_t0, b, N as u32
|
||||
multi_fn = vqdmulls-noqself-noext, a, b
|
||||
a = 2
|
||||
b = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
n = HFLEN
|
||||
validate 8
|
||||
|
||||
aarch64 = sqdmull
|
||||
generate i32:int32x2_t:i64, i32:int32x4_t:i64
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull_high_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, a:in_t, a, a, {asc-out_len-out_len}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:in_t, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = vqdmull-self-noext, a, b
|
||||
a = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
b = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
n = HFLEN
|
||||
validate 16, 20, 24, 28
|
||||
|
||||
aarch64 = sqdmull2
|
||||
generate int16x8_t:int16x4_t:int32x4_t, int32x4_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply long
|
||||
name = vqdmull_high_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {asc-out_len-out_len}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {dup-out_len-N as u32}
|
||||
multi_fn = vqdmull-noqself-noext, a, b
|
||||
a = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
b = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
n = HFLEN
|
||||
validate 16, 20, 24, 28
|
||||
|
||||
aarch64 = sqdmull2
|
||||
generate int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply-add long
|
||||
name = vqdmlal
|
||||
multi_fn = vqadd-out-noext, a, {vqdmull-self-noext, b, c}
|
||||
a = 1, 1, 1, 1
|
||||
b = 1, 2, 3, 4
|
||||
c = 2, 2, 2, 2
|
||||
validate 5, 9, 13, 17
|
||||
|
||||
aarch64 = sqdmlal
|
||||
arm = vqdmlal
|
||||
generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Vector widening saturating doubling multiply accumulate with scalar
|
||||
name = vqdmlal
|
||||
n-suffix
|
||||
multi_fn = vqadd-out-noext, a, {vqdmull_n-self-noext, b, c}
|
||||
a = 1, 1, 1, 1
|
||||
b = 1, 2, 3, 4
|
||||
c = 2
|
||||
validate 5, 9, 13, 17
|
||||
|
||||
aarch64 = sqdmlal
|
||||
arm = vqdmlal
|
||||
generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply-add long
|
||||
name = vqdmlal_high
|
||||
no-q
|
||||
multi_fn = vqadd-out-noext, a, {vqdmull_high-noqself-noext, b, c}
|
||||
a = 1, 2, 3, 4
|
||||
b = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
c = 1, 2, 5, 6, 5, 6, 7, 8
|
||||
validate 41, 62, 87, 116
|
||||
|
||||
aarch64 = sqdmlal2
|
||||
generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply-add long
|
||||
name = vqdmlal_high_n
|
||||
no-q
|
||||
multi_fn = vqadd-out-noext, a, {vqdmull_high_n-noqself-noext, b, c}
|
||||
a = 1, 2, 3, 4
|
||||
b = 0, 2, 8, 10, 8, 10, 12, 14
|
||||
c = 2
|
||||
validate 33, 42, 51, 60
|
||||
|
||||
aarch64 = sqdmlal2
|
||||
generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
|
||||
|
||||
/// Vector widening saturating doubling multiply accumulate with scalar
|
||||
name = vqdmlal_lane
|
||||
in2-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in2_exp_len-N
|
||||
multi_fn = vqadd-out-noext, a, {vqdmull_lane-in2-::<N>, b, c}
|
||||
a = 1, 2, 3, 4
|
||||
b = 1, 2, 3, 4
|
||||
c = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
n = HFLEN
|
||||
validate 5, 10, 15, 20
|
||||
|
||||
aarch64 = sqdmlal
|
||||
generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
|
||||
|
||||
arm = vqdmlal
|
||||
generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply-add long
|
||||
name = vqdmlal_high_lane
|
||||
in2-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in2_exp_len-N
|
||||
multi_fn = vqadd-out-noext, a, {vqdmull_high_lane-in2-::<N>, b, c}
|
||||
a = 1, 2, 3, 4
|
||||
b = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
c = 0, 2, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 17, 22, 27, 32
|
||||
|
||||
aarch64 = sqdmlal2
|
||||
generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply-subtract long
|
||||
name = vqdmlsl
|
||||
multi_fn = vqsub-out-noext, a, {vqdmull-self-noext, b, c}
|
||||
a = 3, 7, 11, 15
|
||||
b = 1, 2, 3, 4
|
||||
c = 2, 2, 2, 2
|
||||
validate -1, -1, -1, -1
|
||||
|
||||
aarch64 = sqdmlsl
|
||||
arm = vqdmlsl
|
||||
generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Vector widening saturating doubling multiply subtract with scalar
|
||||
name = vqdmlsl
|
||||
n-suffix
|
||||
multi_fn = vqsub-out-noext, a, {vqdmull_n-self-noext, b, c}
|
||||
a = 3, 7, 11, 15
|
||||
b = 1, 2, 3, 4
|
||||
c = 2
|
||||
validate -1, -1, -1, -1
|
||||
|
||||
aarch64 = sqdmlsl
|
||||
arm = vqdmlsl
|
||||
generate int32x4_t:int16x4_t:i16:int32x4_t, int64x2_t:int32x2_t:i32:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply-subtract long
|
||||
name = vqdmlsl_high
|
||||
no-q
|
||||
multi_fn = vqsub-out-noext, a, {vqdmull_high-noqself-noext, b, c}
|
||||
a = 39, 58, 81, 108
|
||||
b = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
c = 1, 2, 5, 6, 5, 6, 7, 8
|
||||
validate -1, -2, -3, -4
|
||||
|
||||
aarch64 = sqdmlsl2
|
||||
generate int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply-subtract long
|
||||
name = vqdmlsl_high_n
|
||||
no-q
|
||||
multi_fn = vqsub-out-noext, a, {vqdmull_high_n-noqself-noext, b, c}
|
||||
a = 31, 38, 45, 52
|
||||
b = 0, 2, 8, 10, 8, 10, 12, 14
|
||||
c = 2
|
||||
validate -1, -2, -3, -4
|
||||
|
||||
aarch64 = sqdmlsl2
|
||||
generate int32x4_t:int16x8_t:i16:int32x4_t, int64x2_t:int32x4_t:i32:int64x2_t
|
||||
|
||||
/// Vector widening saturating doubling multiply subtract with scalar
|
||||
name = vqdmlsl_lane
|
||||
in2-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in2_exp_len-N
|
||||
multi_fn = vqsub-out-noext, a, {vqdmull_lane-in2-::<N>, b, c}
|
||||
a = 3, 6, 9, 12
|
||||
b = 1, 2, 3, 4
|
||||
c = 0, 2, 2, 0, 2, 0, 0, 0
|
||||
n = HFLEN
|
||||
validate -1, -2, -3, -4
|
||||
|
||||
aarch64 = sqdmlsl
|
||||
generate int32x4_t:int16x4_t:int16x8_t:int32x4_t, int64x2_t:int32x2_t:int32x4_t:int64x2_t
|
||||
|
||||
arm = vqdmlsl
|
||||
generate int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply-subtract long
|
||||
name = vqdmlsl_high_lane
|
||||
in2-suffix
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in2_exp_len-N
|
||||
multi_fn = vqsub-out-noext, a, {vqdmull_high_lane-in2-::<N>, b, c}
|
||||
a = 15, 18, 21, 24
|
||||
b = 0, 1, 4, 5, 4, 5, 6, 7
|
||||
c = 0, 2, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate -1, -2, -3, -4
|
||||
|
||||
aarch64 = sqdmlsl2
|
||||
generate int32x4_t:int16x8_t:int16x4_t:int32x4_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t: int32x4_t:int32x2_t:int64x2_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Signed saturating doubling multiply returning high half
|
||||
name = vqdmulh
|
||||
a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1
|
||||
|
||||
aarch64 = sqdmulh
|
||||
link-aarch64 = sqdmulh._EXT_
|
||||
arm = vqdmulh
|
||||
link-arm = vqdmulh._EXT_
|
||||
generate int16x4_t, int16x8_t, int32x2_t, int32x4_t
|
||||
|
||||
/// Signed saturating doubling multiply returning high half
|
||||
name = vqdmulh
|
||||
multi_fn = vdup_n-in_ntt-noext, a:in_ntt, a
|
||||
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
|
||||
multi_fn = simd_extract, {vqdmulh-in_ntt-noext, a, b}, 0
|
||||
a = 1
|
||||
b = 2
|
||||
validate 0
|
||||
|
||||
aarch64 = sqdmulh
|
||||
generate i16, i32
|
||||
|
||||
/// Vector saturating doubling multiply high with scalar
|
||||
name = vqdmulh_n
|
||||
out-suffix
|
||||
multi_fn = vdup_n-in_ntt-noext, b:in_ntt, b
|
||||
multi_fn = vqdmulh-out-noext, a, b
|
||||
a = MAX, MAX, MAX, MAX
|
||||
b = 2
|
||||
validate 1, 1, 1, 1
|
||||
|
||||
aarch64 = sqdmulh
|
||||
arm = vqdmulh
|
||||
generate int16x4_t:i16:int16x4_t, int32x2_t:i32:int32x2_t
|
||||
|
||||
/// Vector saturating doubling multiply high with scalar
|
||||
name = vqdmulhq_n
|
||||
out-suffix
|
||||
multi_fn = vdupq_n-in_ntt-noext, b:out_t, b
|
||||
multi_fn = vqdmulh-out-noext, a, b
|
||||
a = MAX, MAX, MAX, MAX, MAX, MAX, MAX, MAX
|
||||
b = 2
|
||||
validate 1, 1, 1, 1, 1, 1, 1, 1
|
||||
|
||||
aarch64 = sqdmulh
|
||||
arm = vqdmulh
|
||||
generate int16x8_t:i16:int16x8_t, int32x4_t:i32:int32x4_t
|
||||
|
||||
/// Signed saturating doubling multiply returning high half
|
||||
name = vqdmulhh_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_extract, b:in_t0, b, N as u32
|
||||
multi_fn = vqdmulhh-out_ntt-noext, a, b
|
||||
a = 2
|
||||
b = 0, 0, MAX, 0, 0, 0, 0, 0
|
||||
n = 2
|
||||
validate 1
|
||||
|
||||
aarch64 = sqdmulh
|
||||
generate i16:int16x4_t:i16, i16:int16x8_t:i16
|
||||
|
||||
/// Signed saturating doubling multiply returning high half
|
||||
name = vqdmulhs_lane
|
||||
constn = N
|
||||
multi_fn = static_assert_imm-in_exp_len-N
|
||||
multi_fn = simd_extract, b:in_t0, b, N as u32
|
||||
multi_fn = vqdmulhs-out_ntt-noext, a, b
|
||||
a = 2
|
||||
b = 0, MAX, 0, 0
|
||||
n = 1
|
||||
validate 1
|
||||
|
||||
aarch64 = sqdmulh
|
||||
generate i32:int32x2_t:i32, i32:int32x4_t:i32
|
||||
|
||||
/// Signed saturating rounding shift left
|
||||
name = vqrshl
|
||||
a = MIN, MAX, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
|
|
|||
|
|
@ -320,10 +320,10 @@ fn type_to_noq_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
|
|||
|
||||
fn type_to_noq_suffix(t: &str) -> &str {
|
||||
match t {
|
||||
"int8x8_t" | "int8x16_t" => "_s8",
|
||||
"int16x4_t" | "int16x8_t" => "_s16",
|
||||
"int32x2_t" | "int32x4_t" => "_s32",
|
||||
"int64x1_t" | "int64x2_t" => "_s64",
|
||||
"int8x8_t" | "int8x16_t" | "i8" => "_s8",
|
||||
"int16x4_t" | "int16x8_t" | "i16" => "_s16",
|
||||
"int32x2_t" | "int32x4_t" | "i32" => "_s32",
|
||||
"int64x1_t" | "int64x2_t" | "i64" => "_s64",
|
||||
"uint8x8_t" | "uint8x16_t" => "_u8",
|
||||
"uint16x4_t" | "uint16x8_t" => "_u16",
|
||||
"uint32x2_t" | "uint32x4_t" => "_u32",
|
||||
|
|
@ -348,6 +348,7 @@ enum Suffix {
|
|||
NoQNSuffix,
|
||||
OutSuffix,
|
||||
Lane,
|
||||
In2,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
|
|
@ -845,6 +846,7 @@ fn gen_aarch64(
|
|||
NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
|
||||
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
|
||||
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
|
||||
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
|
||||
};
|
||||
let current_fn = if let Some(current_fn) = current_fn.clone() {
|
||||
if link_aarch64.is_some() {
|
||||
|
|
@ -1218,6 +1220,7 @@ fn gen_arm(
|
|||
NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
|
||||
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
|
||||
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
|
||||
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
|
||||
};
|
||||
let current_aarch64 = current_aarch64
|
||||
.clone()
|
||||
|
|
@ -1729,6 +1732,7 @@ fn get_call(
|
|||
let start = match &*fn_format[1] {
|
||||
"0" => 0,
|
||||
"n" => n.unwrap(),
|
||||
"out_len" => type_len(out_t) as i32,
|
||||
"halflen" => (type_len(in_t[1]) / 2) as i32,
|
||||
s => s.parse::<i32>().unwrap(),
|
||||
};
|
||||
|
|
@ -1747,6 +1751,7 @@ fn get_call(
|
|||
"out_bits_exp_len" => type_bits_exp_len(out_t),
|
||||
"in_exp_len" => type_exp_len(in_t[1]),
|
||||
"in_bits_exp_len" => type_bits_exp_len(in_t[1]),
|
||||
"in2_exp_len" => type_exp_len(in_t[2]),
|
||||
_ => 0,
|
||||
};
|
||||
if len == 0 {
|
||||
|
|
@ -1922,6 +1927,10 @@ fn get_call(
|
|||
fn_name.push_str(type_to_suffix(in_t[1]));
|
||||
} else if fn_format[1] == "nself" {
|
||||
fn_name.push_str(type_to_n_suffix(in_t[1]));
|
||||
} else if fn_format[1] == "out" {
|
||||
fn_name.push_str(type_to_suffix(out_t));
|
||||
} else if fn_format[1] == "in2" {
|
||||
fn_name.push_str(type_to_suffix(in_t[2]));
|
||||
} else if fn_format[1] == "signed" {
|
||||
fn_name.push_str(type_to_suffix(type_to_signed(in_t[1])));
|
||||
} else if fn_format[1] == "unsigned" {
|
||||
|
|
@ -2132,6 +2141,8 @@ mod test {
|
|||
suffix = OutSuffix;
|
||||
} else if line.starts_with("lane-suffixes") {
|
||||
suffix = Lane;
|
||||
} else if line.starts_with("in2-suffix") {
|
||||
suffix = In2;
|
||||
} else if line.starts_with("a = ") {
|
||||
a = line[4..].split(',').map(|v| v.trim().to_string()).collect();
|
||||
} else if line.starts_with("b = ") {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue