Add vmul_n, vmul_lane, vmulx neon instructions (#1147)
This commit is contained in:
parent
07f1d0cae3
commit
fd29f9602c
5 changed files with 2263 additions and 118 deletions
|
|
@ -3934,6 +3934,106 @@ pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
|||
simd_mul(a, b)
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul))]
|
||||
pub unsafe fn vmul_n_f64(a: float64x1_t, b: f64) -> float64x1_t {
|
||||
simd_mul(a, vdup_n_f64(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul))]
|
||||
pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
|
||||
simd_mul(a, vdupq_n_f64(b))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
simd_mul(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_mul(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
|
||||
static_assert_imm1!(LANE);
|
||||
let b: f32 = simd_extract(b, LANE as u32);
|
||||
a * b
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
|
||||
static_assert_imm2!(LANE);
|
||||
let b: f32 = simd_extract(b, LANE as u32);
|
||||
a * b
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
let b: f64 = simd_extract(b, LANE as u32);
|
||||
a * b
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
|
||||
static_assert_imm1!(LANE);
|
||||
let b: f64 = simd_extract(b, LANE as u32);
|
||||
a * b
|
||||
}
|
||||
|
||||
/// Signed multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -4004,6 +4104,316 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
|
|||
vmull_p8(a, b)
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2))]
|
||||
pub unsafe fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
|
||||
vmull_high_s16(a, vdupq_n_s16(b))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2))]
|
||||
pub unsafe fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
|
||||
vmull_high_s32(a, vdupq_n_s32(b))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2))]
|
||||
pub unsafe fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
|
||||
vmull_high_u16(a, vdupq_n_u16(b))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2))]
|
||||
pub unsafe fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
|
||||
vmull_high_u32(a, vdupq_n_u32(b))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmull_high_s16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
vmull_high_s16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmull_high_s32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmull_high_s32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmull_high_u16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
vmull_high_u16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmull_high_u32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmull_high_u32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx))]
|
||||
pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f32")]
|
||||
fn vmulx_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vmulx_f32_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx))]
|
||||
pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v4f32")]
|
||||
fn vmulxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vmulxq_f32_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx))]
|
||||
pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v1f64")]
|
||||
fn vmulx_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
|
||||
}
|
||||
vmulx_f64_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx))]
|
||||
pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f64")]
|
||||
fn vmulxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vmulxq_f64_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
vmulx_f64(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmulx_f64(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmulx_f32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmulx_f32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmulxq_f32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmulxq_f32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
vmulxq_f64(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmulxq_f64(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx))]
|
||||
pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f32")]
|
||||
fn vmulxs_f32_(a: f32, b: f32) -> f32;
|
||||
}
|
||||
vmulxs_f32_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx))]
|
||||
pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f64")]
|
||||
fn vmulxd_f64_(a: f64, b: f64) -> f64;
|
||||
}
|
||||
vmulxd_f64_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
|
||||
static_assert_imm1!(LANE);
|
||||
vmulxs_f32(a, simd_extract(b, LANE as u32))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
|
||||
static_assert_imm2!(LANE);
|
||||
vmulxs_f32(a, simd_extract(b, LANE as u32))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
|
||||
static_assert!(LANE : i32 where LANE == 0);
|
||||
vmulxd_f64(a, simd_extract(b, LANE as u32))
|
||||
}
|
||||
|
||||
/// Floating-point multiply extended
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
|
||||
static_assert_imm1!(LANE);
|
||||
vmulxd_f64(a, simd_extract(b, LANE as u32))
|
||||
}
|
||||
|
||||
/// Floating-point fused Multiply-Add to accumulator(vector)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -10814,6 +11224,96 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_n_f64() {
|
||||
let a: f64 = 1.;
|
||||
let b: f64 = 2.;
|
||||
let e: f64 = 2.;
|
||||
let r: f64 = transmute(vmul_n_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_n_f64() {
|
||||
let a: f64x2 = f64x2::new(1., 2.);
|
||||
let b: f64 = 2.;
|
||||
let e: f64x2 = f64x2::new(2., 4.);
|
||||
let r: f64x2 = transmute(vmulq_n_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_lane_f64() {
|
||||
let a: f64 = 1.;
|
||||
let b: f64 = 2.;
|
||||
let e: f64 = 2.;
|
||||
let r: f64 = transmute(vmul_lane_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_laneq_f64() {
|
||||
let a: f64 = 1.;
|
||||
let b: f64x2 = f64x2::new(2., 0.);
|
||||
let e: f64 = 2.;
|
||||
let r: f64 = transmute(vmul_laneq_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_lane_f64() {
|
||||
let a: f64x2 = f64x2::new(1., 2.);
|
||||
let b: f64 = 2.;
|
||||
let e: f64x2 = f64x2::new(2., 4.);
|
||||
let r: f64x2 = transmute(vmulq_lane_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_laneq_f64() {
|
||||
let a: f64x2 = f64x2::new(1., 2.);
|
||||
let b: f64x2 = f64x2::new(2., 0.);
|
||||
let e: f64x2 = f64x2::new(2., 4.);
|
||||
let r: f64x2 = transmute(vmulq_laneq_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmuls_lane_f32() {
|
||||
let a: f32 = 1.;
|
||||
let b: f32x2 = f32x2::new(2., 0.);
|
||||
let e: f32 = 2.;
|
||||
let r: f32 = transmute(vmuls_lane_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmuls_laneq_f32() {
|
||||
let a: f32 = 1.;
|
||||
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
|
||||
let e: f32 = 2.;
|
||||
let r: f32 = transmute(vmuls_laneq_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmuld_lane_f64() {
|
||||
let a: f64 = 1.;
|
||||
let b: f64 = 2.;
|
||||
let e: f64 = 2.;
|
||||
let r: f64 = transmute(vmuld_lane_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmuld_laneq_f64() {
|
||||
let a: f64 = 1.;
|
||||
let b: f64x2 = f64x2::new(2., 0.);
|
||||
let e: f64 = 2.;
|
||||
let r: f64 = transmute(vmuld_laneq_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_s8() {
|
||||
let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
|
@ -10877,6 +11377,276 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_n_s16() {
|
||||
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
let b: i16 = 2;
|
||||
let e: i32x4 = i32x4::new(18, 20, 22, 24);
|
||||
let r: i32x4 = transmute(vmull_high_n_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_n_s32() {
|
||||
let a: i32x4 = i32x4::new(1, 2, 9, 10);
|
||||
let b: i32 = 2;
|
||||
let e: i64x2 = i64x2::new(18, 20);
|
||||
let r: i64x2 = transmute(vmull_high_n_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_n_u16() {
|
||||
let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
let b: u16 = 2;
|
||||
let e: u32x4 = u32x4::new(18, 20, 22, 24);
|
||||
let r: u32x4 = transmute(vmull_high_n_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_n_u32() {
|
||||
let a: u32x4 = u32x4::new(1, 2, 9, 10);
|
||||
let b: u32 = 2;
|
||||
let e: u64x2 = u64x2::new(18, 20);
|
||||
let r: u64x2 = transmute(vmull_high_n_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_lane_s16() {
|
||||
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
let b: i16x4 = i16x4::new(0, 2, 0, 0);
|
||||
let e: i32x4 = i32x4::new(18, 20, 22, 24);
|
||||
let r: i32x4 = transmute(vmull_high_lane_s16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_laneq_s16() {
|
||||
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: i32x4 = i32x4::new(18, 20, 22, 24);
|
||||
let r: i32x4 = transmute(vmull_high_laneq_s16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_lane_s32() {
|
||||
let a: i32x4 = i32x4::new(1, 2, 9, 10);
|
||||
let b: i32x2 = i32x2::new(0, 2);
|
||||
let e: i64x2 = i64x2::new(18, 20);
|
||||
let r: i64x2 = transmute(vmull_high_lane_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_laneq_s32() {
|
||||
let a: i32x4 = i32x4::new(1, 2, 9, 10);
|
||||
let b: i32x4 = i32x4::new(0, 2, 0, 0);
|
||||
let e: i64x2 = i64x2::new(18, 20);
|
||||
let r: i64x2 = transmute(vmull_high_laneq_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_lane_u16() {
|
||||
let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
let b: u16x4 = u16x4::new(0, 2, 0, 0);
|
||||
let e: u32x4 = u32x4::new(18, 20, 22, 24);
|
||||
let r: u32x4 = transmute(vmull_high_lane_u16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_laneq_u16() {
|
||||
let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: u32x4 = u32x4::new(18, 20, 22, 24);
|
||||
let r: u32x4 = transmute(vmull_high_laneq_u16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_lane_u32() {
|
||||
let a: u32x4 = u32x4::new(1, 2, 9, 10);
|
||||
let b: u32x2 = u32x2::new(0, 2);
|
||||
let e: u64x2 = u64x2::new(18, 20);
|
||||
let r: u64x2 = transmute(vmull_high_lane_u32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_laneq_u32() {
|
||||
let a: u32x4 = u32x4::new(1, 2, 9, 10);
|
||||
let b: u32x4 = u32x4::new(0, 2, 0, 0);
|
||||
let e: u64x2 = u64x2::new(18, 20);
|
||||
let r: u64x2 = transmute(vmull_high_laneq_u32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulx_f32() {
|
||||
let a: f32x2 = f32x2::new(1., 2.);
|
||||
let b: f32x2 = f32x2::new(2., 2.);
|
||||
let e: f32x2 = f32x2::new(2., 4.);
|
||||
let r: f32x2 = transmute(vmulx_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxq_f32() {
|
||||
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
|
||||
let b: f32x4 = f32x4::new(2., 2., 2., 2.);
|
||||
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
|
||||
let r: f32x4 = transmute(vmulxq_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulx_f64() {
|
||||
let a: f64 = 1.;
|
||||
let b: f64 = 2.;
|
||||
let e: f64 = 2.;
|
||||
let r: f64 = transmute(vmulx_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxq_f64() {
|
||||
let a: f64x2 = f64x2::new(1., 2.);
|
||||
let b: f64x2 = f64x2::new(2., 2.);
|
||||
let e: f64x2 = f64x2::new(2., 4.);
|
||||
let r: f64x2 = transmute(vmulxq_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulx_lane_f64() {
|
||||
let a: f64 = 1.;
|
||||
let b: f64 = 2.;
|
||||
let e: f64 = 2.;
|
||||
let r: f64 = transmute(vmulx_lane_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulx_laneq_f64() {
|
||||
let a: f64 = 1.;
|
||||
let b: f64x2 = f64x2::new(2., 0.);
|
||||
let e: f64 = 2.;
|
||||
let r: f64 = transmute(vmulx_laneq_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulx_lane_f32() {
|
||||
let a: f32x2 = f32x2::new(1., 2.);
|
||||
let b: f32x2 = f32x2::new(2., 0.);
|
||||
let e: f32x2 = f32x2::new(2., 4.);
|
||||
let r: f32x2 = transmute(vmulx_lane_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulx_laneq_f32() {
|
||||
let a: f32x2 = f32x2::new(1., 2.);
|
||||
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
|
||||
let e: f32x2 = f32x2::new(2., 4.);
|
||||
let r: f32x2 = transmute(vmulx_laneq_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxq_lane_f32() {
|
||||
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
|
||||
let b: f32x2 = f32x2::new(2., 0.);
|
||||
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
|
||||
let r: f32x4 = transmute(vmulxq_lane_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxq_laneq_f32() {
|
||||
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
|
||||
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
|
||||
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
|
||||
let r: f32x4 = transmute(vmulxq_laneq_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxq_lane_f64() {
|
||||
let a: f64x2 = f64x2::new(1., 2.);
|
||||
let b: f64 = 2.;
|
||||
let e: f64x2 = f64x2::new(2., 4.);
|
||||
let r: f64x2 = transmute(vmulxq_lane_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxq_laneq_f64() {
|
||||
let a: f64x2 = f64x2::new(1., 2.);
|
||||
let b: f64x2 = f64x2::new(2., 0.);
|
||||
let e: f64x2 = f64x2::new(2., 4.);
|
||||
let r: f64x2 = transmute(vmulxq_laneq_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxs_f32() {
|
||||
let a: f32 = 2.;
|
||||
let b: f32 = 3.;
|
||||
let e: f32 = 6.;
|
||||
let r: f32 = transmute(vmulxs_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxd_f64() {
|
||||
let a: f64 = 2.;
|
||||
let b: f64 = 3.;
|
||||
let e: f64 = 6.;
|
||||
let r: f64 = transmute(vmulxd_f64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxs_lane_f32() {
|
||||
let a: f32 = 2.;
|
||||
let b: f32x2 = f32x2::new(3., 0.);
|
||||
let e: f32 = 6.;
|
||||
let r: f32 = transmute(vmulxs_lane_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxs_laneq_f32() {
|
||||
let a: f32 = 2.;
|
||||
let b: f32x4 = f32x4::new(3., 0., 0., 0.);
|
||||
let e: f32 = 6.;
|
||||
let r: f32 = transmute(vmulxs_laneq_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxd_lane_f64() {
|
||||
let a: f64 = 2.;
|
||||
let b: f64 = 3.;
|
||||
let e: f64 = 6.;
|
||||
let r: f64 = transmute(vmulxd_lane_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulxd_laneq_f64() {
|
||||
let a: f64 = 2.;
|
||||
let b: f64x2 = f64x2::new(3., 0.);
|
||||
let e: f64 = 6.;
|
||||
let r: f64 = transmute(vmulxd_laneq_f64::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vfma_f64() {
|
||||
let a: f64 = 2.0;
|
||||
|
|
|
|||
|
|
@ -108,9 +108,6 @@ extern "C" {
|
|||
#[link_name = "llvm.aarch64.neon.usqadd.v2i64"]
|
||||
fn vsqaddq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
|
||||
|
||||
#[link_name = "llvm.aarch64.neon.pmull64"]
|
||||
fn vmull_p64_(a: i64, b: i64) -> int8x16_t;
|
||||
|
||||
#[link_name = "llvm.aarch64.neon.addp.v8i16"]
|
||||
fn vpaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
|
||||
#[link_name = "llvm.aarch64.neon.addp.v4i32"]
|
||||
|
|
@ -1150,14 +1147,6 @@ pub unsafe fn vaddlvq_u8(a: uint8x16_t) -> u16 {
|
|||
vaddlvq_u8_(a) as u16
|
||||
}
|
||||
|
||||
/// Polynomial multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(pmull))]
|
||||
pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
|
||||
transmute(vmull_p64_(transmute(a), transmute(b)))
|
||||
}
|
||||
|
||||
/// Vector add.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -3260,36 +3249,6 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_p64() {
|
||||
// FIXME: I've a hard time writing a test for this as the documentation
|
||||
// from arm is a bit thin as to waht exactly it does
|
||||
let a: i64 = 8;
|
||||
let b: i64 = 7;
|
||||
let e: i128 = 56;
|
||||
let r: i128 = transmute(vmull_p64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
/*
|
||||
let a: i64 = 5;
|
||||
let b: i64 = 5;
|
||||
let e: i128 = 25;
|
||||
let r: i128 = transmute(vmull_p64(a, b));
|
||||
|
||||
assert_eq!(r, e);
|
||||
let a: i64 = 6;
|
||||
let b: i64 = 6;
|
||||
let e: i128 = 36;
|
||||
let r: i128 = transmute(vmull_p64(a, b));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: i64 = 7;
|
||||
let b: i64 = 6;
|
||||
let e: i128 = 42;
|
||||
let r: i128 = transmute(vmull_p64(a, b));
|
||||
assert_eq!(r, e);
|
||||
*/
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vadd_f64() {
|
||||
let a = 1.;
|
||||
|
|
|
|||
|
|
@ -5558,6 +5558,38 @@ pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
|
|||
simd_mul(a, b)
|
||||
}
|
||||
|
||||
/// Polynomial multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
|
||||
pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v8i8")]
|
||||
fn vmul_p8_(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
|
||||
}
|
||||
vmul_p8_(a, b)
|
||||
}
|
||||
|
||||
/// Polynomial multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
|
||||
pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v16i8")]
|
||||
fn vmulq_p8_(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
|
||||
}
|
||||
vmulq_p8_(a, b)
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -5578,6 +5610,346 @@ pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
|
|||
simd_mul(a, b)
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
|
||||
pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
|
||||
simd_mul(a, vdup_n_s16(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
|
||||
pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
|
||||
simd_mul(a, vdupq_n_s16(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
|
||||
pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
|
||||
simd_mul(a, vdup_n_s32(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
|
||||
pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
|
||||
simd_mul(a, vdupq_n_s32(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
|
||||
pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
|
||||
simd_mul(a, vdup_n_u16(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
|
||||
pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
|
||||
simd_mul(a, vdupq_n_u16(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
|
||||
pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
|
||||
simd_mul(a, vdup_n_u32(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
|
||||
pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
|
||||
simd_mul(a, vdupq_n_u32(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
|
||||
pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
|
||||
simd_mul(a, vdup_n_f32(b))
|
||||
}
|
||||
|
||||
/// Vector multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
|
||||
pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
|
||||
simd_mul(a, vdupq_n_f32(b))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
|
||||
static_assert_imm3!(LANE);
|
||||
simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
|
||||
static_assert_imm3!(LANE);
|
||||
simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
|
||||
static_assert_imm1!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Signed multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -5690,6 +6062,142 @@ pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
|
|||
vmull_p8_(a, b)
|
||||
}
|
||||
|
||||
/// Vector long multiply with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
|
||||
pub unsafe fn vmullh_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
|
||||
vmull_s16(a, vdup_n_s16(b))
|
||||
}
|
||||
|
||||
/// Vector long multiply with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
|
||||
pub unsafe fn vmulls_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
|
||||
vmull_s32(a, vdup_n_s32(b))
|
||||
}
|
||||
|
||||
/// Vector long multiply with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
|
||||
pub unsafe fn vmullh_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
|
||||
vmull_u16(a, vdup_n_u16(b))
|
||||
}
|
||||
|
||||
/// Vector long multiply with scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
|
||||
pub unsafe fn vmulls_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
|
||||
vmull_u32(a, vdup_n_u32(b))
|
||||
}
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmull_s16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
vmull_s16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmull_s32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmull_s32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmull_u16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
|
||||
static_assert_imm3!(LANE);
|
||||
vmull_u16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
|
||||
static_assert_imm1!(LANE);
|
||||
vmull_u32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
|
||||
#[rustc_legacy_const_generics(2)]
|
||||
pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
|
||||
static_assert_imm2!(LANE);
|
||||
vmull_u32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
|
||||
}
|
||||
|
||||
/// Floating-point fused Multiply-Add to accumulator(vector)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -17013,6 +17521,24 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_p8() {
|
||||
let a: i8x8 = i8x8::new(1, 3, 1, 3, 1, 3, 1, 3);
|
||||
let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let e: i8x8 = i8x8::new(1, 6, 3, 12, 5, 10, 7, 24);
|
||||
let r: i8x8 = transmute(vmul_p8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_p8() {
|
||||
let a: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3);
|
||||
let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let e: i8x16 = i8x16::new(1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48);
|
||||
let r: i8x16 = transmute(vmulq_p8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_f32() {
|
||||
let a: f32x2 = f32x2::new(1.0, 2.0);
|
||||
|
|
@ -17031,6 +17557,276 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_n_s16() {
|
||||
let a: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let b: i16 = 2;
|
||||
let e: i16x4 = i16x4::new(2, 4, 6, 8);
|
||||
let r: i16x4 = transmute(vmul_n_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_n_s16() {
|
||||
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: i16 = 2;
|
||||
let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
|
||||
let r: i16x8 = transmute(vmulq_n_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_n_s32() {
|
||||
let a: i32x2 = i32x2::new(1, 2);
|
||||
let b: i32 = 2;
|
||||
let e: i32x2 = i32x2::new(2, 4);
|
||||
let r: i32x2 = transmute(vmul_n_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_n_s32() {
|
||||
let a: i32x4 = i32x4::new(1, 2, 3, 4);
|
||||
let b: i32 = 2;
|
||||
let e: i32x4 = i32x4::new(2, 4, 6, 8);
|
||||
let r: i32x4 = transmute(vmulq_n_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_n_u16() {
|
||||
let a: u16x4 = u16x4::new(1, 2, 3, 4);
|
||||
let b: u16 = 2;
|
||||
let e: u16x4 = u16x4::new(2, 4, 6, 8);
|
||||
let r: u16x4 = transmute(vmul_n_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_n_u16() {
|
||||
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: u16 = 2;
|
||||
let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
|
||||
let r: u16x8 = transmute(vmulq_n_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_n_u32() {
|
||||
let a: u32x2 = u32x2::new(1, 2);
|
||||
let b: u32 = 2;
|
||||
let e: u32x2 = u32x2::new(2, 4);
|
||||
let r: u32x2 = transmute(vmul_n_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_n_u32() {
|
||||
let a: u32x4 = u32x4::new(1, 2, 3, 4);
|
||||
let b: u32 = 2;
|
||||
let e: u32x4 = u32x4::new(2, 4, 6, 8);
|
||||
let r: u32x4 = transmute(vmulq_n_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_n_f32() {
|
||||
let a: f32x2 = f32x2::new(1., 2.);
|
||||
let b: f32 = 2.;
|
||||
let e: f32x2 = f32x2::new(2., 4.);
|
||||
let r: f32x2 = transmute(vmul_n_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_n_f32() {
|
||||
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
|
||||
let b: f32 = 2.;
|
||||
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
|
||||
let r: f32x4 = transmute(vmulq_n_f32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_lane_s16() {
|
||||
let a: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let b: i16x4 = i16x4::new(0, 2, 0, 0);
|
||||
let e: i16x4 = i16x4::new(2, 4, 6, 8);
|
||||
let r: i16x4 = transmute(vmul_lane_s16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_laneq_s16() {
|
||||
let a: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: i16x4 = i16x4::new(2, 4, 6, 8);
|
||||
let r: i16x4 = transmute(vmul_laneq_s16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_lane_s16() {
|
||||
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: i16x4 = i16x4::new(0, 2, 0, 0);
|
||||
let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
|
||||
let r: i16x8 = transmute(vmulq_lane_s16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_laneq_s16() {
|
||||
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
|
||||
let r: i16x8 = transmute(vmulq_laneq_s16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_lane_s32() {
|
||||
let a: i32x2 = i32x2::new(1, 2);
|
||||
let b: i32x2 = i32x2::new(0, 2);
|
||||
let e: i32x2 = i32x2::new(2, 4);
|
||||
let r: i32x2 = transmute(vmul_lane_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_laneq_s32() {
|
||||
let a: i32x2 = i32x2::new(1, 2);
|
||||
let b: i32x4 = i32x4::new(0, 2, 0, 0);
|
||||
let e: i32x2 = i32x2::new(2, 4);
|
||||
let r: i32x2 = transmute(vmul_laneq_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_lane_s32() {
|
||||
let a: i32x4 = i32x4::new(1, 2, 3, 4);
|
||||
let b: i32x2 = i32x2::new(0, 2);
|
||||
let e: i32x4 = i32x4::new(2, 4, 6, 8);
|
||||
let r: i32x4 = transmute(vmulq_lane_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_laneq_s32() {
|
||||
let a: i32x4 = i32x4::new(1, 2, 3, 4);
|
||||
let b: i32x4 = i32x4::new(0, 2, 0, 0);
|
||||
let e: i32x4 = i32x4::new(2, 4, 6, 8);
|
||||
let r: i32x4 = transmute(vmulq_laneq_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_lane_u16() {
|
||||
let a: u16x4 = u16x4::new(1, 2, 3, 4);
|
||||
let b: u16x4 = u16x4::new(0, 2, 0, 0);
|
||||
let e: u16x4 = u16x4::new(2, 4, 6, 8);
|
||||
let r: u16x4 = transmute(vmul_lane_u16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_laneq_u16() {
|
||||
let a: u16x4 = u16x4::new(1, 2, 3, 4);
|
||||
let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: u16x4 = u16x4::new(2, 4, 6, 8);
|
||||
let r: u16x4 = transmute(vmul_laneq_u16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_lane_u16() {
|
||||
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: u16x4 = u16x4::new(0, 2, 0, 0);
|
||||
let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
|
||||
let r: u16x8 = transmute(vmulq_lane_u16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_laneq_u16() {
|
||||
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
|
||||
let r: u16x8 = transmute(vmulq_laneq_u16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_lane_u32() {
|
||||
let a: u32x2 = u32x2::new(1, 2);
|
||||
let b: u32x2 = u32x2::new(0, 2);
|
||||
let e: u32x2 = u32x2::new(2, 4);
|
||||
let r: u32x2 = transmute(vmul_lane_u32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_laneq_u32() {
|
||||
let a: u32x2 = u32x2::new(1, 2);
|
||||
let b: u32x4 = u32x4::new(0, 2, 0, 0);
|
||||
let e: u32x2 = u32x2::new(2, 4);
|
||||
let r: u32x2 = transmute(vmul_laneq_u32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_lane_u32() {
|
||||
let a: u32x4 = u32x4::new(1, 2, 3, 4);
|
||||
let b: u32x2 = u32x2::new(0, 2);
|
||||
let e: u32x4 = u32x4::new(2, 4, 6, 8);
|
||||
let r: u32x4 = transmute(vmulq_lane_u32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_laneq_u32() {
|
||||
let a: u32x4 = u32x4::new(1, 2, 3, 4);
|
||||
let b: u32x4 = u32x4::new(0, 2, 0, 0);
|
||||
let e: u32x4 = u32x4::new(2, 4, 6, 8);
|
||||
let r: u32x4 = transmute(vmulq_laneq_u32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_lane_f32() {
|
||||
let a: f32x2 = f32x2::new(1., 2.);
|
||||
let b: f32x2 = f32x2::new(2., 0.);
|
||||
let e: f32x2 = f32x2::new(2., 4.);
|
||||
let r: f32x2 = transmute(vmul_lane_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_laneq_f32() {
|
||||
let a: f32x2 = f32x2::new(1., 2.);
|
||||
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
|
||||
let e: f32x2 = f32x2::new(2., 4.);
|
||||
let r: f32x2 = transmute(vmul_laneq_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_lane_f32() {
|
||||
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
|
||||
let b: f32x2 = f32x2::new(2., 0.);
|
||||
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
|
||||
let r: f32x4 = transmute(vmulq_lane_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulq_laneq_f32() {
|
||||
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
|
||||
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
|
||||
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
|
||||
let r: f32x4 = transmute(vmulq_laneq_f32::<0>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_s8() {
|
||||
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
|
@ -17094,6 +17890,114 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmullh_n_s16() {
|
||||
let a: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let b: i16 = 2;
|
||||
let e: i32x4 = i32x4::new(2, 4, 6, 8);
|
||||
let r: i32x4 = transmute(vmullh_n_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulls_n_s32() {
|
||||
let a: i32x2 = i32x2::new(1, 2);
|
||||
let b: i32 = 2;
|
||||
let e: i64x2 = i64x2::new(2, 4);
|
||||
let r: i64x2 = transmute(vmulls_n_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmullh_n_u16() {
|
||||
let a: u16x4 = u16x4::new(1, 2, 3, 4);
|
||||
let b: u16 = 2;
|
||||
let e: u32x4 = u32x4::new(2, 4, 6, 8);
|
||||
let r: u32x4 = transmute(vmullh_n_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmulls_n_u32() {
|
||||
let a: u32x2 = u32x2::new(1, 2);
|
||||
let b: u32 = 2;
|
||||
let e: u64x2 = u64x2::new(2, 4);
|
||||
let r: u64x2 = transmute(vmulls_n_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_lane_s16() {
|
||||
let a: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let b: i16x4 = i16x4::new(0, 2, 0, 0);
|
||||
let e: i32x4 = i32x4::new(2, 4, 6, 8);
|
||||
let r: i32x4 = transmute(vmull_lane_s16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_laneq_s16() {
|
||||
let a: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: i32x4 = i32x4::new(2, 4, 6, 8);
|
||||
let r: i32x4 = transmute(vmull_laneq_s16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_lane_s32() {
|
||||
let a: i32x2 = i32x2::new(1, 2);
|
||||
let b: i32x2 = i32x2::new(0, 2);
|
||||
let e: i64x2 = i64x2::new(2, 4);
|
||||
let r: i64x2 = transmute(vmull_lane_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_laneq_s32() {
|
||||
let a: i32x2 = i32x2::new(1, 2);
|
||||
let b: i32x4 = i32x4::new(0, 2, 0, 0);
|
||||
let e: i64x2 = i64x2::new(2, 4);
|
||||
let r: i64x2 = transmute(vmull_laneq_s32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_lane_u16() {
|
||||
let a: u16x4 = u16x4::new(1, 2, 3, 4);
|
||||
let b: u16x4 = u16x4::new(0, 2, 0, 0);
|
||||
let e: u32x4 = u32x4::new(2, 4, 6, 8);
|
||||
let r: u32x4 = transmute(vmull_lane_u16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_laneq_u16() {
|
||||
let a: u16x4 = u16x4::new(1, 2, 3, 4);
|
||||
let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
|
||||
let e: u32x4 = u32x4::new(2, 4, 6, 8);
|
||||
let r: u32x4 = transmute(vmull_laneq_u16::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_lane_u32() {
|
||||
let a: u32x2 = u32x2::new(1, 2);
|
||||
let b: u32x2 = u32x2::new(0, 2);
|
||||
let e: u64x2 = u64x2::new(2, 4);
|
||||
let r: u64x2 = transmute(vmull_lane_u32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_laneq_u32() {
|
||||
let a: u32x2 = u32x2::new(1, 2);
|
||||
let b: u32x4 = u32x4::new(0, 2, 0, 0);
|
||||
let e: u64x2 = u64x2::new(2, 4);
|
||||
let r: u64x2 = transmute(vmull_laneq_u32::<1>(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vfma_f32() {
|
||||
let a: f32x2 = f32x2::new(2.0, 3.0);
|
||||
|
|
|
|||
|
|
@ -338,7 +338,7 @@ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t,
|
|||
|
||||
/// Signed compare bitwise Test bits nonzero
|
||||
name = vtst
|
||||
multi_fn = simd_and, c:in_t
|
||||
multi_fn = simd_and, c:in_t, a, b
|
||||
multi_fn = fixed, d:in_t
|
||||
multi_fn = simd_ne, c, transmute(d)
|
||||
a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
|
||||
|
|
@ -354,7 +354,7 @@ generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8
|
|||
|
||||
/// Unsigned compare bitwise Test bits nonzero
|
||||
name = vtst
|
||||
multi_fn = simd_and, c:in_t
|
||||
multi_fn = simd_and, c:in_t, a, b
|
||||
multi_fn = fixed, d:in_t
|
||||
multi_fn = simd_ne, c, transmute(d)
|
||||
a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
|
||||
|
|
@ -1864,6 +1864,18 @@ aarch64 = mul
|
|||
fn = simd_mul
|
||||
generate int*_t, uint*_t
|
||||
|
||||
/// Polynomial multiply
|
||||
name = vmul
|
||||
a = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
|
||||
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
validate 1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48
|
||||
|
||||
aarch64 = pmul
|
||||
link-aarch64 = pmul._EXT_
|
||||
arm = vmul
|
||||
link-arm = vmulp._EXT_
|
||||
generate poly8x8_t, poly8x16_t
|
||||
|
||||
/// Multiply
|
||||
name = vmul
|
||||
fn = simd_mul
|
||||
|
|
@ -1877,6 +1889,108 @@ generate float64x*_t
|
|||
arm = vmul.
|
||||
generate float*_t
|
||||
|
||||
/// Vector multiply by scalar
|
||||
name = vmul
|
||||
out-n-suffix
|
||||
multi_fn = simd_mul, a, {vdup-nout-noext, b}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 2
|
||||
validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
|
||||
|
||||
arm = vmul
|
||||
aarch64 = mul
|
||||
generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t
|
||||
generate uint16x4_t:u16:uint16x4_t, uint16x8_t:u16:uint16x8_t, uint32x2_t:u32:uint32x2_t, uint32x4_t:u32:uint32x4_t
|
||||
|
||||
/// Vector multiply by scalar
|
||||
name = vmul
|
||||
out-n-suffix
|
||||
multi_fn = simd_mul, a, {vdup-nout-noext, b}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 2.
|
||||
validate 2., 4., 6., 8.
|
||||
|
||||
aarch64 = fmul
|
||||
generate float64x1_t:f64:float64x1_t, float64x2_t:f64:float64x2_t
|
||||
|
||||
arm = vmul
|
||||
generate float32x2_t:f32:float32x2_t, float32x4_t:f32:float32x4_t
|
||||
|
||||
/// Multiply
|
||||
name = vmul
|
||||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_mul, a, {simd_shuffle-out_len-noext, b, b, {dup-out_len-LANE as u32}}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
|
||||
|
||||
aarch64 = mul
|
||||
arm = vmul
|
||||
generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t
|
||||
generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t
|
||||
generate uint16x4_t, uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
|
||||
generate uint32x2_t, uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
|
||||
|
||||
/// Floating-point multiply
|
||||
name = vmul
|
||||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_mul, a, {transmute--<element_t _>, {simd_extract, b, LANE as u32}}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 2., 0., 0., 0.
|
||||
n = 0
|
||||
validate 2., 4., 6., 8.
|
||||
|
||||
aarch64 = fmul
|
||||
generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
|
||||
|
||||
/// Floating-point multiply
|
||||
name = vmul
|
||||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_mul, a, {simd_shuffle-out_len-noext, b, b, {dup-out_len-LANE as u32}}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 2., 0., 0., 0.
|
||||
n = 0
|
||||
validate 2., 4., 6., 8.
|
||||
|
||||
aarch64 = fmul
|
||||
generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
|
||||
|
||||
arm = vmul
|
||||
generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
|
||||
|
||||
/// Floating-point multiply
|
||||
name = vmuls_lane
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_extract, b:f32, b, LANE as u32
|
||||
multi_fn = a * b
|
||||
a = 1.
|
||||
b = 2., 0., 0., 0.
|
||||
n = 0
|
||||
validate 2.
|
||||
aarch64 = fmul
|
||||
generate f32:float32x2_t:f32, f32:float32x4_t:f32
|
||||
|
||||
/// Floating-point multiply
|
||||
name = vmuld_lane
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = simd_extract, b:f64, b, LANE as u32
|
||||
multi_fn = a * b
|
||||
a = 1.
|
||||
b = 2., 0.
|
||||
n = 0
|
||||
validate 2.
|
||||
aarch64 = fmul
|
||||
generate f64:float64x1_t:f64, f64:float64x2_t:f64
|
||||
|
||||
/// Signed multiply long
|
||||
name = vmull
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
|
|
@ -1941,6 +2055,21 @@ link-arm = vmullp._EXT_
|
|||
link-aarch64 = pmull._EXT_
|
||||
generate poly8x8_t:poly8x8_t:poly16x8_t
|
||||
|
||||
/// Polynomial multiply long
|
||||
name = vmull
|
||||
no-q
|
||||
a = 15
|
||||
b = 3
|
||||
validate 17
|
||||
target = crypto
|
||||
|
||||
aarch64 = pmull
|
||||
link-aarch64 = pmull64:p64:p64:p64:int8x16_t
|
||||
arm = vmull
|
||||
link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
|
||||
//generate p64:p64:p128
|
||||
|
||||
|
||||
/// Polynomial multiply long
|
||||
name = vmull_high
|
||||
no-q
|
||||
|
|
@ -1955,6 +2084,144 @@ validate 9, 30, 11, 20, 13, 18, 15, 48
|
|||
aarch64 = pmull
|
||||
generate poly8x16_t:poly8x16_t:poly16x8_t
|
||||
|
||||
/// Polynomial multiply long
|
||||
name = vmull_high
|
||||
no-q
|
||||
multi_fn = vmull-noqself-noext, {simd_extract, a, 1}, {simd_extract, b, 1}
|
||||
a = 1, 15
|
||||
b = 1, 3
|
||||
validate 17
|
||||
target = crypto
|
||||
|
||||
aarch64 = pmull2
|
||||
//generate poly64x2_t:poly64x2_t:p128
|
||||
|
||||
/// Vector long multiply with scalar
|
||||
name = vmull
|
||||
n-suffix
|
||||
multi_fn = vmull-in0-noext, a, {vdup-nin0-noext, b}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8
|
||||
b = 2
|
||||
validate 2, 4, 6, 8, 10, 12, 14, 16
|
||||
|
||||
arm = vmull
|
||||
aarch64 = smull
|
||||
generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
|
||||
aarch64 = umull
|
||||
generate uint16x4_t:u16:uint32x4_t, uint32x2_t:u32:uint64x2_t
|
||||
|
||||
/// Vector long multiply by scalar
|
||||
name = vmull_lane
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = vmull-in0-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
|
||||
|
||||
arm = vmull
|
||||
aarch64 = smull
|
||||
generate int16x4_t:int16x4_t:int32x4_t, int16x4_t:int16x8_t:int32x4_t
|
||||
generate int32x2_t:int32x2_t:int64x2_t, int32x2_t:int32x4_t:int64x2_t
|
||||
aarch64 = umull
|
||||
generate uint16x4_t:uint16x4_t:uint32x4_t, uint16x4_t:uint16x8_t:uint32x4_t
|
||||
generate uint32x2_t:uint32x2_t:uint64x2_t, uint32x2_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Multiply long
|
||||
name = vmull_high_n
|
||||
no-q
|
||||
multi_fn = vmull_high-noqself-noext, a, {vdup-nin0-noext, b}
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 2
|
||||
validate 18, 20, 22, 24, 26, 28, 30, 32
|
||||
|
||||
aarch64 = smull2
|
||||
generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
|
||||
aarch64 = umull2
|
||||
generate uint16x8_t:u16:uint32x4_t, uint32x4_t:u32:uint64x2_t
|
||||
|
||||
/// Multiply long
|
||||
name = vmull_high_lane
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = vmull_high-noqself-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
n = 1
|
||||
validate 18, 20, 22, 24, 26, 28, 30, 32
|
||||
|
||||
aarch64 = smull2
|
||||
generate int16x8_t:int16x4_t:int32x4_t, int16x8_t:int16x8_t:int32x4_t
|
||||
generate int32x4_t:int32x2_t:int64x2_t, int32x4_t:int32x4_t:int64x2_t
|
||||
aarch64 = umull2
|
||||
generate uint16x8_t:uint16x4_t:uint32x4_t, uint16x8_t:uint16x8_t:uint32x4_t
|
||||
generate uint32x4_t:uint32x2_t:uint64x2_t, uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Floating-point multiply extended
|
||||
name = vmulx
|
||||
a = 1., 2., 3., 4.
|
||||
b = 2., 2., 2., 2.
|
||||
validate 2., 4., 6., 8.
|
||||
|
||||
aarch64 = fmulx
|
||||
link-aarch64 = fmulx._EXT_
|
||||
generate float*_t, float64x*_t
|
||||
|
||||
/// Floating-point multiply extended
|
||||
name = vmulx
|
||||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = vmulx-in0-noext, a, {transmute--<element_t _>, {simd_extract, b, LANE as u32}}
|
||||
a = 1.
|
||||
b = 2., 0.
|
||||
n = 0
|
||||
validate 2.
|
||||
|
||||
aarch64 = fmulx
|
||||
generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
|
||||
|
||||
/// Floating-point multiply extended
|
||||
name = vmulx
|
||||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = vmulx-in0-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
|
||||
a = 1., 2., 3., 4.
|
||||
b = 2., 0., 0., 0.
|
||||
n = 0
|
||||
validate 2., 4., 6., 8.
|
||||
|
||||
aarch64 = fmulx
|
||||
generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
|
||||
generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
|
||||
|
||||
/// Floating-point multiply extended
|
||||
name = vmulx
|
||||
a = 2.
|
||||
b = 3.
|
||||
validate 6.
|
||||
|
||||
aarch64 = fmulx
|
||||
link-aarch64 = fmulx._EXT_
|
||||
generate f32, f64
|
||||
|
||||
/// Floating-point multiply extended
|
||||
name = vmulx
|
||||
lane-suffixes
|
||||
constn = LANE
|
||||
multi_fn = static_assert_imm-in_exp_len-LANE
|
||||
multi_fn = vmulx-out-noext, a, {simd_extract, b, LANE as u32}
|
||||
|
||||
a = 2.
|
||||
b = 3., 0., 0., 0.
|
||||
n = 0
|
||||
validate 6.
|
||||
|
||||
aarch64 = fmulx
|
||||
generate f32:float32x2_t:f32, f32:float32x4_t:f32, f64:float64x1_t:f64, f64:float64x2_t:f64
|
||||
|
||||
/// Floating-point fused Multiply-Add to accumulator(vector)
|
||||
name = vfma
|
||||
a = 2.0, 3.0, 4.0, 5.0
|
||||
|
|
@ -2142,7 +2409,7 @@ generate uint32x4_t:u64
|
|||
name = vsubhn
|
||||
no-q
|
||||
multi_fn = fixed, c:in_t
|
||||
multi_fn = simd_cast, {simd_shr, {simd_sub}, transmute(c)}
|
||||
multi_fn = simd_cast, {simd_shr, {simd_sub, a, b}, transmute(c)}
|
||||
a = MAX, MIN, 1, 1, MAX, MIN, 1, 1
|
||||
b = 1, 0, 0, 0, 1, 0, 0, 0
|
||||
fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ fn type_len(t: &str) -> usize {
|
|||
"poly64x1_t" => 1,
|
||||
"poly64x2_t" => 2,
|
||||
"i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "p8"
|
||||
| "p16" => 1,
|
||||
| "p16" | "p64" | "p128" => 1,
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
|
|
@ -324,16 +324,16 @@ fn type_to_noq_suffix(t: &str) -> &str {
|
|||
"int16x4_t" | "int16x8_t" | "i16" => "_s16",
|
||||
"int32x2_t" | "int32x4_t" | "i32" => "_s32",
|
||||
"int64x1_t" | "int64x2_t" | "i64" => "_s64",
|
||||
"uint8x8_t" | "uint8x16_t" => "_u8",
|
||||
"uint16x4_t" | "uint16x8_t" => "_u16",
|
||||
"uint32x2_t" | "uint32x4_t" => "_u32",
|
||||
"uint64x1_t" | "uint64x2_t" => "_u64",
|
||||
"uint8x8_t" | "uint8x16_t" | "u8" => "_u8",
|
||||
"uint16x4_t" | "uint16x8_t" | "u16" => "_u16",
|
||||
"uint32x2_t" | "uint32x4_t" | "u32" => "_u32",
|
||||
"uint64x1_t" | "uint64x2_t" | "u64" => "_u64",
|
||||
"float16x4_t" | "float16x8_t" => "_f16",
|
||||
"float32x2_t" | "float32x4_t" => "_f32",
|
||||
"float64x1_t" | "float64x2_t" => "_f64",
|
||||
"poly8x8_t" | "poly8x16_t" => "_p8",
|
||||
"poly16x4_t" | "poly16x8_t" => "_p16",
|
||||
"poly64x1_t" | "poly64x2_t" => "_p64",
|
||||
"poly64x1_t" | "poly64x2_t" | "p64" => "_p64",
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
|
|
@ -347,6 +347,7 @@ enum Suffix {
|
|||
NSuffix,
|
||||
NoQNSuffix,
|
||||
OutSuffix,
|
||||
OutNSuffix,
|
||||
Lane,
|
||||
In2,
|
||||
In2Lane,
|
||||
|
|
@ -354,8 +355,10 @@ enum Suffix {
|
|||
|
||||
#[derive(Clone, Copy)]
|
||||
enum TargetFeature {
|
||||
Default,
|
||||
ArmV7,
|
||||
FPArmV8,
|
||||
Crypto,
|
||||
}
|
||||
|
||||
fn type_to_global_type(t: &str) -> &str {
|
||||
|
|
@ -400,6 +403,8 @@ fn type_to_global_type(t: &str) -> &str {
|
|||
"f64" => "f64",
|
||||
"p8" => "p8",
|
||||
"p16" => "p16",
|
||||
"p64" => "p64",
|
||||
"p128" => "p128",
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
}
|
||||
|
|
@ -492,6 +497,10 @@ fn type_to_ext(t: &str) -> &str {
|
|||
"u16" => "v4i16",
|
||||
"u32" => "v2i32",
|
||||
"u64" => "v1i64",
|
||||
"f32" => "f32",
|
||||
"f64" => "f64",
|
||||
"p64" => "p64",
|
||||
"p128" => "p128",
|
||||
/*
|
||||
"poly64x1_t" => "i64x1",
|
||||
"poly64x2_t" => "i64x2",
|
||||
|
|
@ -825,6 +834,7 @@ fn gen_aarch64(
|
|||
)],
|
||||
suffix: Suffix,
|
||||
para_num: i32,
|
||||
target: TargetFeature,
|
||||
fixed: &Vec<String>,
|
||||
multi_fn: &Vec<String>,
|
||||
) -> (String, String) {
|
||||
|
|
@ -846,16 +856,20 @@ fn gen_aarch64(
|
|||
NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])),
|
||||
NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
|
||||
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
|
||||
OutNSuffix => format!("{}{}", current_name, type_to_n_suffix(out_t)),
|
||||
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
|
||||
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
|
||||
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
|
||||
};
|
||||
let current_target = match target {
|
||||
Default => "neon",
|
||||
ArmV7 => "v7",
|
||||
FPArmV8 => "fp-armv8,v8",
|
||||
Crypto => "neon,crypto",
|
||||
};
|
||||
let current_fn = if let Some(current_fn) = current_fn.clone() {
|
||||
if link_aarch64.is_some() {
|
||||
panic!(
|
||||
"[{}] Can't specify link and (multi) fn at the same time.",
|
||||
name
|
||||
)
|
||||
panic!("[{}] Can't specify link and fn at the same time.", name)
|
||||
}
|
||||
current_fn
|
||||
} else if link_aarch64.is_some() {
|
||||
|
|
@ -872,7 +886,24 @@ fn gen_aarch64(
|
|||
let current_aarch64 = current_aarch64.clone().unwrap();
|
||||
let mut ext_c = String::new();
|
||||
let mut ext_c_const = String::new();
|
||||
if let Some(link_aarch64) = link_aarch64.clone() {
|
||||
let mut link_t: Vec<String> = vec![
|
||||
in_t[0].to_string(),
|
||||
in_t[1].to_string(),
|
||||
in_t[2].to_string(),
|
||||
out_t.to_string(),
|
||||
];
|
||||
if let Some(mut link_aarch64) = link_aarch64.clone() {
|
||||
if link_aarch64.contains(":") {
|
||||
let links: Vec<_> = link_aarch64.split(':').map(|v| v.to_string()).collect();
|
||||
assert_eq!(links.len(), 5);
|
||||
link_aarch64 = links[0].to_string();
|
||||
link_t = vec![
|
||||
links[1].clone(),
|
||||
links[2].clone(),
|
||||
links[3].clone(),
|
||||
links[4].clone(),
|
||||
];
|
||||
}
|
||||
let ext = type_to_ext(in_t[0]);
|
||||
let ext2 = type_to_ext(out_t);
|
||||
let link_aarch64 = if link_aarch64.starts_with("llvm") {
|
||||
|
|
@ -893,17 +924,17 @@ fn gen_aarch64(
|
|||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
format!("a: {}", in_t[0])
|
||||
format!("a: {}", link_t[0])
|
||||
}
|
||||
2 => {
|
||||
format!("a: {}, b: {}", in_t[0], in_t[1])
|
||||
format!("a: {}, b: {}", link_t[0], link_t[1])
|
||||
}
|
||||
3 => {
|
||||
format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2])
|
||||
format!("a: {}, b: {}, c: {}", link_t[0], link_t[1], link_t[2])
|
||||
}
|
||||
_ => unimplemented!("unknown para_num"),
|
||||
},
|
||||
out_t
|
||||
link_t[3]
|
||||
);
|
||||
if const_aarch64.is_some() {
|
||||
ext_c_const = format!(
|
||||
|
|
@ -998,6 +1029,11 @@ fn gen_aarch64(
|
|||
} else {
|
||||
String::new()
|
||||
};
|
||||
let trans: [&str; 2] = if link_t[3] != out_t {
|
||||
["transmute(", ")"]
|
||||
} else {
|
||||
["", ""]
|
||||
};
|
||||
let call = if let Some(const_aarch64) = const_aarch64 {
|
||||
match para_num {
|
||||
1 => format!(
|
||||
|
|
@ -1033,16 +1069,16 @@ fn gen_aarch64(
|
|||
match (multi_calls.len(), para_num, fixed.len()) {
|
||||
(0, 1, 0) => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
{}{}(a)
|
||||
{}{}{}(a){}
|
||||
}}"#,
|
||||
name, const_declare, in_t[0], out_t, ext_c, current_fn,
|
||||
name, const_declare, in_t[0], out_t, ext_c, trans[0], current_fn, trans[1]
|
||||
),
|
||||
(0, 1, _) => {
|
||||
let fixed: Vec<String> = fixed.iter().take(type_len(in_t[0])).cloned().collect();
|
||||
format!(
|
||||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
let b{};
|
||||
{}{}(a, transmute(b))
|
||||
{}{}{}(a, transmute(b)){}
|
||||
}}"#,
|
||||
name,
|
||||
const_declare,
|
||||
|
|
@ -1050,14 +1086,16 @@ fn gen_aarch64(
|
|||
out_t,
|
||||
values(in_t[0], &fixed),
|
||||
ext_c,
|
||||
trans[0],
|
||||
current_fn,
|
||||
trans[1],
|
||||
)
|
||||
}
|
||||
(0, 2, _) => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
|
||||
{}{}(a, b)
|
||||
{}{}{}(a, b){}
|
||||
}}"#,
|
||||
name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn,
|
||||
name, const_declare, in_t[0], in_t[1], out_t, ext_c, trans[0], current_fn, trans[1],
|
||||
),
|
||||
(0, 3, _) => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{
|
||||
|
|
@ -1090,11 +1128,11 @@ fn gen_aarch64(
|
|||
r#"
|
||||
{}
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[target_feature(enable = "{}")]
|
||||
#[cfg_attr(test, assert_instr({}{}))]{}
|
||||
{}
|
||||
"#,
|
||||
current_comment, current_aarch64, const_assert, const_legacy, call
|
||||
current_comment, current_target, current_aarch64, const_assert, const_legacy, call
|
||||
);
|
||||
|
||||
let test = gen_test(
|
||||
|
|
@ -1259,6 +1297,7 @@ fn gen_arm(
|
|||
NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])),
|
||||
NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
|
||||
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
|
||||
OutNSuffix => format!("{}{}", current_name, type_to_n_suffix(out_t)),
|
||||
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
|
||||
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
|
||||
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
|
||||
|
|
@ -1266,10 +1305,17 @@ fn gen_arm(
|
|||
let current_aarch64 = current_aarch64
|
||||
.clone()
|
||||
.unwrap_or_else(|| current_arm.to_string());
|
||||
|
||||
let current_target = match target {
|
||||
let current_target_aarch64 = match target {
|
||||
Default => "neon",
|
||||
ArmV7 => "neon",
|
||||
FPArmV8 => "neon",
|
||||
Crypto => "neon,crypto",
|
||||
};
|
||||
let current_target_arm = match target {
|
||||
Default => "v7",
|
||||
ArmV7 => "v7",
|
||||
FPArmV8 => "fp-armv8,v8",
|
||||
Crypto => "crypto,v8",
|
||||
};
|
||||
|
||||
let current_fn = if let Some(current_fn) = current_fn.clone() {
|
||||
|
|
@ -1292,9 +1338,57 @@ fn gen_arm(
|
|||
String::new()
|
||||
};
|
||||
let mut ext_c = String::new();
|
||||
let mut ext_c_const_arm = String::new();
|
||||
let mut ext_c_const_aarch64 = String::new();
|
||||
if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
|
||||
let mut ext_c_arm = if multi_fn.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
String::from(
|
||||
r#"
|
||||
"#,
|
||||
)
|
||||
};
|
||||
let mut ext_c_aarch64 = if multi_fn.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
String::from(
|
||||
r#"
|
||||
"#,
|
||||
)
|
||||
};
|
||||
let mut link_arm_t: Vec<String> = vec![
|
||||
in_t[0].to_string(),
|
||||
in_t[1].to_string(),
|
||||
in_t[2].to_string(),
|
||||
out_t.to_string(),
|
||||
];
|
||||
let mut link_aarch64_t: Vec<String> = vec![
|
||||
in_t[0].to_string(),
|
||||
in_t[1].to_string(),
|
||||
in_t[2].to_string(),
|
||||
out_t.to_string(),
|
||||
];
|
||||
if let (Some(mut link_arm), Some(mut link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
|
||||
if link_arm.contains(":") {
|
||||
let links: Vec<_> = link_arm.split(':').map(|v| v.to_string()).collect();
|
||||
assert_eq!(links.len(), 5);
|
||||
link_arm = links[0].to_string();
|
||||
link_arm_t = vec![
|
||||
links[1].clone(),
|
||||
links[2].clone(),
|
||||
links[3].clone(),
|
||||
links[4].clone(),
|
||||
];
|
||||
}
|
||||
if link_aarch64.contains(":") {
|
||||
let links: Vec<_> = link_aarch64.split(':').map(|v| v.to_string()).collect();
|
||||
assert_eq!(links.len(), 5);
|
||||
link_aarch64 = links[0].to_string();
|
||||
link_aarch64_t = vec![
|
||||
links[1].clone(),
|
||||
links[2].clone(),
|
||||
links[3].clone(),
|
||||
links[4].clone(),
|
||||
];
|
||||
}
|
||||
let ext = type_to_ext(in_t[0]);
|
||||
let ext2 = type_to_ext(out_t);
|
||||
let link_arm = if link_arm.starts_with("llvm") {
|
||||
|
|
@ -1311,35 +1405,36 @@ fn gen_arm(
|
|||
link.push_str(&link_aarch64);
|
||||
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
|
||||
};
|
||||
ext_c = format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] {
|
||||
ext_c = format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
#[cfg_attr(target_arch = "arm", link_name = "{}")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
|
||||
fn {}({}) -> {};
|
||||
}}
|
||||
"#,
|
||||
link_arm,
|
||||
link_aarch64,
|
||||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
format!("a: {}", in_t[0])
|
||||
}
|
||||
2 => {
|
||||
format!("a: {}, b: {}", in_t[0], in_t[1])
|
||||
}
|
||||
3 => {
|
||||
format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2])
|
||||
}
|
||||
_ => unimplemented!("unknown para_num"),
|
||||
},
|
||||
out_t
|
||||
);
|
||||
link_arm,
|
||||
link_aarch64,
|
||||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
format!("a: {}", in_t[0])
|
||||
}
|
||||
2 => {
|
||||
format!("a: {}, b: {}", in_t[0], in_t[1])
|
||||
}
|
||||
3 => {
|
||||
format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2])
|
||||
}
|
||||
_ => unimplemented!("unknown para_num"),
|
||||
},
|
||||
out_t
|
||||
);
|
||||
};
|
||||
if const_arm.is_some() {
|
||||
ext_c_const_arm = format!(
|
||||
r#"
|
||||
#[allow(improper_ctypes)]
|
||||
ext_c_arm.push_str(&format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
#[cfg_attr(target_arch = "arm", link_name = "{}")]
|
||||
fn {}({}) -> {};
|
||||
|
|
@ -1363,12 +1458,39 @@ fn gen_arm(
|
|||
_ => unimplemented!("unknown para_num"),
|
||||
},
|
||||
out_t
|
||||
);
|
||||
));
|
||||
};
|
||||
if out_t != link_arm_t[3] {
|
||||
ext_c_arm.push_str(&format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
#[cfg_attr(target_arch = "arm", link_name = "{}")]
|
||||
fn {}({}) -> {};
|
||||
}}
|
||||
"#,
|
||||
link_arm,
|
||||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
format!("a: {}", link_arm_t[0])
|
||||
}
|
||||
2 => {
|
||||
format!("a: {}, b: {}", link_arm_t[0], link_arm_t[1])
|
||||
}
|
||||
3 => {
|
||||
format!(
|
||||
"a: {}, b: {}, c: {}",
|
||||
link_arm_t[0], link_arm_t[1], link_arm_t[2]
|
||||
)
|
||||
}
|
||||
_ => unimplemented!("unknown para_num"),
|
||||
},
|
||||
link_arm_t[3]
|
||||
));
|
||||
}
|
||||
if const_aarch64.is_some() {
|
||||
ext_c_const_aarch64 = format!(
|
||||
r#"
|
||||
#[allow(improper_ctypes)]
|
||||
ext_c_aarch64.push_str(&format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
|
||||
fn {}({}) -> {};
|
||||
|
|
@ -1389,7 +1511,35 @@ fn gen_arm(
|
|||
_ => unimplemented!("unknown para_num"),
|
||||
},
|
||||
out_t
|
||||
);
|
||||
));
|
||||
}
|
||||
if out_t != link_aarch64_t[3] {
|
||||
ext_c_aarch64.push_str(&format!(
|
||||
r#"#[allow(improper_ctypes)]
|
||||
extern "C" {{
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
|
||||
fn {}({}) -> {};
|
||||
}}
|
||||
"#,
|
||||
link_aarch64,
|
||||
current_fn,
|
||||
match para_num {
|
||||
1 => {
|
||||
format!("a: {}", link_aarch64_t[0])
|
||||
}
|
||||
2 => {
|
||||
format!("a: {}, b: {}", link_aarch64_t[0], link_aarch64_t[1])
|
||||
}
|
||||
3 => {
|
||||
format!(
|
||||
"a: {}, b: {}, c: {}",
|
||||
link_aarch64_t[0], link_aarch64_t[1], link_aarch64_t[2]
|
||||
)
|
||||
}
|
||||
_ => unimplemented!("unknown para_num"),
|
||||
},
|
||||
link_aarch64_t[3]
|
||||
));
|
||||
}
|
||||
};
|
||||
let multi_calls = if !multi_fn.is_empty() {
|
||||
|
|
@ -1430,6 +1580,11 @@ fn gen_arm(
|
|||
} else {
|
||||
String::new()
|
||||
};
|
||||
let trans: [&str; 2] = if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] {
|
||||
["", ""]
|
||||
} else {
|
||||
["transmute(", ")"]
|
||||
};
|
||||
let call = match (multi_calls.len(), para_num, fixed.len()) {
|
||||
(0, 1, 0) => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
|
|
@ -1485,7 +1640,7 @@ fn gen_arm(
|
|||
),
|
||||
(_, _, _) => String::new(),
|
||||
};
|
||||
let call_const_arm = if let Some(const_arm) = const_arm {
|
||||
let call_arm = if let Some(const_arm) = const_arm {
|
||||
let const_arm = const_arm.replace("ttn", type_to_native_type(in_t[1]));
|
||||
let mut cnt = String::from(in_t[1]);
|
||||
cnt.push_str("(");
|
||||
|
|
@ -1501,20 +1656,61 @@ fn gen_arm(
|
|||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
{}{}{}(a, {})
|
||||
}}"#,
|
||||
name, const_declare, in_t[0], out_t, multi_calls, ext_c_const_arm, current_fn, cnt
|
||||
name, const_declare, in_t[0], out_t, multi_calls, ext_c_arm, current_fn, cnt
|
||||
),
|
||||
2 => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
r#"pub unsafe fn {}{}(a: {}, b:{}) -> {} {{
|
||||
{}{}{}(a, b, {})
|
||||
}}"#,
|
||||
name, const_declare, in_t[0], out_t, multi_calls, ext_c_const_arm, current_fn, cnt
|
||||
name,
|
||||
const_declare,
|
||||
in_t[0],
|
||||
in_t[1],
|
||||
out_t,
|
||||
multi_calls,
|
||||
ext_c_arm,
|
||||
current_fn,
|
||||
cnt
|
||||
),
|
||||
_ => String::new(),
|
||||
}
|
||||
} else if out_t != link_arm_t[3] {
|
||||
match para_num {
|
||||
1 => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
{}{}{}{}(a){}
|
||||
}}"#,
|
||||
name,
|
||||
const_declare,
|
||||
in_t[0],
|
||||
out_t,
|
||||
multi_calls,
|
||||
ext_c_arm,
|
||||
trans[0],
|
||||
current_fn,
|
||||
trans[1]
|
||||
),
|
||||
2 => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
|
||||
{}{}{}{}(transmute(a), transmute(b)){}
|
||||
}}"#,
|
||||
name,
|
||||
const_declare,
|
||||
in_t[0],
|
||||
in_t[1],
|
||||
out_t,
|
||||
multi_calls,
|
||||
ext_c_arm,
|
||||
trans[0],
|
||||
current_fn,
|
||||
trans[1],
|
||||
),
|
||||
_ => String::new(),
|
||||
}
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let call_const_aarch64 = if let Some(const_aarch64) = const_aarch64 {
|
||||
let call_aarch64 = if let Some(const_aarch64) = const_aarch64 {
|
||||
match para_num {
|
||||
1 => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
|
|
@ -1525,55 +1721,94 @@ fn gen_arm(
|
|||
in_t[0],
|
||||
out_t,
|
||||
multi_calls,
|
||||
ext_c_const_aarch64,
|
||||
ext_c_aarch64,
|
||||
current_fn,
|
||||
const_aarch64
|
||||
),
|
||||
2 => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
|
||||
{}{}{}(a, b, {})
|
||||
}}"#,
|
||||
name,
|
||||
const_declare,
|
||||
in_t[0],
|
||||
in_t[1],
|
||||
out_t,
|
||||
multi_calls,
|
||||
ext_c_aarch64,
|
||||
current_fn,
|
||||
const_aarch64
|
||||
),
|
||||
_ => String::new(),
|
||||
}
|
||||
} else if out_t != link_aarch64_t[3] {
|
||||
match para_num {
|
||||
1 => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}) -> {} {{
|
||||
{}{}{}{}(a){}
|
||||
}}"#,
|
||||
name,
|
||||
const_declare,
|
||||
in_t[0],
|
||||
out_t,
|
||||
multi_calls,
|
||||
ext_c_const_aarch64,
|
||||
ext_c_aarch64,
|
||||
trans[0],
|
||||
current_fn,
|
||||
const_aarch64
|
||||
trans[1],
|
||||
),
|
||||
2 => format!(
|
||||
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
|
||||
{}{}{}{}(a, b){}
|
||||
}}"#,
|
||||
name,
|
||||
const_declare,
|
||||
in_t[0],
|
||||
in_t[1],
|
||||
out_t,
|
||||
multi_calls,
|
||||
ext_c_aarch64,
|
||||
trans[0],
|
||||
current_fn,
|
||||
trans[1],
|
||||
),
|
||||
_ => String::new(),
|
||||
}
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let function = if const_arm.is_some() && const_aarch64.is_some() {
|
||||
let function = if (const_arm.is_some() && const_aarch64.is_some())
|
||||
|| out_t != link_arm_t[3]
|
||||
|| out_t != link_aarch64_t[3]
|
||||
{
|
||||
format!(
|
||||
r#"
|
||||
{}
|
||||
#[inline]
|
||||
#[cfg(target_arch = "arm")]
|
||||
#[target_feature(enable = "neon,v7")]
|
||||
#[target_feature(enable = "neon,{}")]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}{}))]{}
|
||||
{}
|
||||
|
||||
{}
|
||||
#[inline]
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[target_feature(enable = "{}")]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}{}))]{}
|
||||
{}
|
||||
"#,
|
||||
current_comment,
|
||||
current_target_arm,
|
||||
expand_intrinsic(¤t_arm, in_t[1]),
|
||||
const_assert,
|
||||
const_legacy,
|
||||
call_const_arm,
|
||||
call_arm,
|
||||
current_comment,
|
||||
current_target_aarch64,
|
||||
expand_intrinsic(¤t_aarch64, in_t[1]),
|
||||
const_assert,
|
||||
const_legacy,
|
||||
call_const_aarch64,
|
||||
call_aarch64,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
|
|
@ -1587,7 +1822,7 @@ fn gen_arm(
|
|||
{}
|
||||
"#,
|
||||
current_comment,
|
||||
current_target,
|
||||
current_target_arm,
|
||||
expand_intrinsic(¤t_arm, in_t[1]),
|
||||
const_assert,
|
||||
expand_intrinsic(¤t_aarch64, in_t[1]),
|
||||
|
|
@ -1755,6 +1990,7 @@ fn get_call(
|
|||
let len = match &*fn_format[1] {
|
||||
"out_len" => type_len(out_t),
|
||||
"in_len" => type_len(in_t[1]),
|
||||
"in0_len" => type_len(in_t[0]),
|
||||
"halflen" => type_len(in_t[1]) / 2,
|
||||
_ => 0,
|
||||
};
|
||||
|
|
@ -2003,6 +2239,8 @@ fn get_call(
|
|||
fn_name.push_str(type_to_n_suffix(in_t[1]));
|
||||
} else if fn_format[1] == "out" {
|
||||
fn_name.push_str(type_to_suffix(out_t));
|
||||
} else if fn_format[1] == "in0" {
|
||||
fn_name.push_str(type_to_suffix(in_t[0]));
|
||||
} else if fn_format[1] == "in2" {
|
||||
fn_name.push_str(type_to_suffix(in_t[2]));
|
||||
} else if fn_format[1] == "signed" {
|
||||
|
|
@ -2028,6 +2266,8 @@ fn get_call(
|
|||
fn_name.push_str(&(type_len(in_t[1]) / 2).to_string());
|
||||
} else if fn_format[1] == "nout" {
|
||||
fn_name.push_str(type_to_n_suffix(out_t));
|
||||
} else if fn_format[1] == "nin0" {
|
||||
fn_name.push_str(type_to_n_suffix(in_t[0]));
|
||||
} else if fn_format[1] == "nsigned" {
|
||||
fn_name.push_str(type_to_n_suffix(type_to_signed(in_t[1])));
|
||||
} else if fn_format[1] == "in_ntt" {
|
||||
|
|
@ -2063,7 +2303,7 @@ fn get_call(
|
|||
}
|
||||
}
|
||||
if param_str.is_empty() {
|
||||
param_str.push_str("a, b");
|
||||
return fn_name;
|
||||
}
|
||||
let fn_str = if let Some((re_name, re_type)) = re.clone() {
|
||||
format!(
|
||||
|
|
@ -2108,7 +2348,7 @@ fn main() -> io::Result<()> {
|
|||
Vec<String>,
|
||||
)> = Vec::new();
|
||||
let mut multi_fn: Vec<String> = Vec::new();
|
||||
let mut target: TargetFeature = ArmV7;
|
||||
let mut target: TargetFeature = Default;
|
||||
|
||||
//
|
||||
// THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
|
||||
|
|
@ -2189,7 +2429,7 @@ mod test {
|
|||
fixed = Vec::new();
|
||||
n = None;
|
||||
multi_fn = Vec::new();
|
||||
target = ArmV7;
|
||||
target = Default;
|
||||
} else if line.starts_with("//") {
|
||||
} else if line.starts_with("name = ") {
|
||||
current_name = Some(String::from(&line[7..]));
|
||||
|
|
@ -2211,6 +2451,8 @@ mod test {
|
|||
suffix = NoQDouble;
|
||||
} else if line.starts_with("n-suffix") {
|
||||
suffix = NSuffix;
|
||||
} else if line.starts_with("out-n-suffix") {
|
||||
suffix = OutNSuffix;
|
||||
} else if line.starts_with("noq-n-suffix") {
|
||||
suffix = NoQNSuffix;
|
||||
} else if line.starts_with("out-suffix") {
|
||||
|
|
@ -2245,10 +2487,12 @@ mod test {
|
|||
} else if line.starts_with("target = ") {
|
||||
target = match Some(String::from(&line[9..])) {
|
||||
Some(input) => match input.as_str() {
|
||||
"v7" => ArmV7,
|
||||
"fp-armv8" => FPArmV8,
|
||||
_ => ArmV7,
|
||||
"crypto" => Crypto,
|
||||
_ => Default,
|
||||
},
|
||||
_ => ArmV7,
|
||||
_ => Default,
|
||||
}
|
||||
} else if line.starts_with("generate ") {
|
||||
let line = &line[9..];
|
||||
|
|
@ -2328,6 +2572,7 @@ mod test {
|
|||
¤t_tests,
|
||||
suffix,
|
||||
para_num,
|
||||
target,
|
||||
&fixed,
|
||||
&multi_fn,
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue