Add vmul_n, vmul_lane, vmulx neon instructions (#1147)

This commit is contained in:
Sparrow Li 2021-05-01 04:09:41 +08:00 committed by GitHub
parent 07f1d0cae3
commit fd29f9602c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 2263 additions and 118 deletions

View file

@ -3934,6 +3934,106 @@ pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
simd_mul(a, b)
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul))]
pub unsafe fn vmul_n_f64(a: float64x1_t, b: f64) -> float64x1_t {
simd_mul(a, vdup_n_f64(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul))]
pub unsafe fn vmulq_n_f64(a: float64x2_t, b: f64) -> float64x2_t {
simd_mul(a, vdupq_n_f64(b))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
static_assert!(LANE : i32 where LANE == 0);
simd_mul(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
static_assert_imm1!(LANE);
simd_mul(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
static_assert!(LANE : i32 where LANE == 0);
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
static_assert_imm1!(LANE);
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmuls_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
static_assert_imm1!(LANE);
let b: f32 = simd_extract(b, LANE as u32);
a * b
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmuls_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
static_assert_imm2!(LANE);
let b: f32 = simd_extract(b, LANE as u32);
a * b
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmuld_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
static_assert!(LANE : i32 where LANE == 0);
let b: f64 = simd_extract(b, LANE as u32);
a * b
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmuld_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
static_assert_imm1!(LANE);
let b: f64 = simd_extract(b, LANE as u32);
a * b
}
/// Signed multiply long
#[inline]
#[target_feature(enable = "neon")]
@ -4004,6 +4104,316 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
vmull_p8(a, b)
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(smull2))]
pub unsafe fn vmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
vmull_high_s16(a, vdupq_n_s16(b))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(smull2))]
pub unsafe fn vmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
vmull_high_s32(a, vdupq_n_s32(b))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(umull2))]
pub unsafe fn vmull_high_n_u16(a: uint16x8_t, b: u16) -> uint32x4_t {
vmull_high_u16(a, vdupq_n_u16(b))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(umull2))]
pub unsafe fn vmull_high_n_u32(a: uint32x4_t, b: u32) -> uint64x2_t {
vmull_high_u32(a, vdupq_n_u32(b))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_high_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
static_assert_imm2!(LANE);
vmull_high_s16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_high_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
static_assert_imm3!(LANE);
vmull_high_s16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_high_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
static_assert_imm1!(LANE);
vmull_high_s32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(smull2, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_high_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
static_assert_imm2!(LANE);
vmull_high_s32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_high_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t {
static_assert_imm2!(LANE);
vmull_high_u16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_high_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
static_assert_imm3!(LANE);
vmull_high_u16(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_high_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t {
static_assert_imm1!(LANE);
vmull_high_u32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(umull2, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_high_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
static_assert_imm2!(LANE);
vmull_high_u32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx))]
pub unsafe fn vmulx_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f32")]
fn vmulx_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t;
}
vmulx_f32_(a, b)
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx))]
pub unsafe fn vmulxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v4f32")]
fn vmulxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t;
}
vmulxq_f32_(a, b)
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx))]
pub unsafe fn vmulx_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v1f64")]
fn vmulx_f64_(a: float64x1_t, b: float64x1_t) -> float64x1_t;
}
vmulx_f64_(a, b)
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx))]
pub unsafe fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.v2f64")]
fn vmulxq_f64_(a: float64x2_t, b: float64x2_t) -> float64x2_t;
}
vmulxq_f64_(a, b)
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulx_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64x1_t {
static_assert!(LANE : i32 where LANE == 0);
vmulx_f64(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulx_laneq_f64<const LANE: i32>(a: float64x1_t, b: float64x2_t) -> float64x1_t {
static_assert_imm1!(LANE);
vmulx_f64(a, transmute::<f64, _>(simd_extract(b, LANE as u32)))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulx_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
static_assert_imm1!(LANE);
vmulx_f32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulx_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
static_assert_imm2!(LANE);
vmulx_f32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulxq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
static_assert_imm1!(LANE);
vmulxq_f32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulxq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
static_assert_imm2!(LANE);
vmulxq_f32(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulxq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
static_assert!(LANE : i32 where LANE == 0);
vmulxq_f64(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulxq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
static_assert_imm1!(LANE);
vmulxq_f64(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx))]
pub unsafe fn vmulxs_f32(a: f32, b: f32) -> f32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f32")]
fn vmulxs_f32_(a: f32, b: f32) -> f32;
}
vmulxs_f32_(a, b)
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx))]
pub unsafe fn vmulxd_f64(a: f64, b: f64) -> f64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmulx.f64")]
fn vmulxd_f64_(a: f64, b: f64) -> f64;
}
vmulxd_f64_(a, b)
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulxs_lane_f32<const LANE: i32>(a: f32, b: float32x2_t) -> f32 {
static_assert_imm1!(LANE);
vmulxs_f32(a, simd_extract(b, LANE as u32))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulxs_laneq_f32<const LANE: i32>(a: f32, b: float32x4_t) -> f32 {
static_assert_imm2!(LANE);
vmulxs_f32(a, simd_extract(b, LANE as u32))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulxd_lane_f64<const LANE: i32>(a: f64, b: float64x1_t) -> f64 {
static_assert!(LANE : i32 where LANE == 0);
vmulxd_f64(a, simd_extract(b, LANE as u32))
}
/// Floating-point multiply extended
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fmulx, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulxd_laneq_f64<const LANE: i32>(a: f64, b: float64x2_t) -> f64 {
static_assert_imm1!(LANE);
vmulxd_f64(a, simd_extract(b, LANE as u32))
}
/// Floating-point fused Multiply-Add to accumulator(vector)
#[inline]
#[target_feature(enable = "neon")]
@ -10814,6 +11224,96 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_n_f64() {
let a: f64 = 1.;
let b: f64 = 2.;
let e: f64 = 2.;
let r: f64 = transmute(vmul_n_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_n_f64() {
let a: f64x2 = f64x2::new(1., 2.);
let b: f64 = 2.;
let e: f64x2 = f64x2::new(2., 4.);
let r: f64x2 = transmute(vmulq_n_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_lane_f64() {
let a: f64 = 1.;
let b: f64 = 2.;
let e: f64 = 2.;
let r: f64 = transmute(vmul_lane_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_laneq_f64() {
let a: f64 = 1.;
let b: f64x2 = f64x2::new(2., 0.);
let e: f64 = 2.;
let r: f64 = transmute(vmul_laneq_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_lane_f64() {
let a: f64x2 = f64x2::new(1., 2.);
let b: f64 = 2.;
let e: f64x2 = f64x2::new(2., 4.);
let r: f64x2 = transmute(vmulq_lane_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_laneq_f64() {
let a: f64x2 = f64x2::new(1., 2.);
let b: f64x2 = f64x2::new(2., 0.);
let e: f64x2 = f64x2::new(2., 4.);
let r: f64x2 = transmute(vmulq_laneq_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmuls_lane_f32() {
let a: f32 = 1.;
let b: f32x2 = f32x2::new(2., 0.);
let e: f32 = 2.;
let r: f32 = transmute(vmuls_lane_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmuls_laneq_f32() {
let a: f32 = 1.;
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
let e: f32 = 2.;
let r: f32 = transmute(vmuls_laneq_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmuld_lane_f64() {
let a: f64 = 1.;
let b: f64 = 2.;
let e: f64 = 2.;
let r: f64 = transmute(vmuld_lane_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmuld_laneq_f64() {
let a: f64 = 1.;
let b: f64x2 = f64x2::new(2., 0.);
let e: f64 = 2.;
let r: f64 = transmute(vmuld_laneq_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_s8() {
let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
@ -10877,6 +11377,276 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_n_s16() {
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
let b: i16 = 2;
let e: i32x4 = i32x4::new(18, 20, 22, 24);
let r: i32x4 = transmute(vmull_high_n_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_n_s32() {
let a: i32x4 = i32x4::new(1, 2, 9, 10);
let b: i32 = 2;
let e: i64x2 = i64x2::new(18, 20);
let r: i64x2 = transmute(vmull_high_n_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_n_u16() {
let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
let b: u16 = 2;
let e: u32x4 = u32x4::new(18, 20, 22, 24);
let r: u32x4 = transmute(vmull_high_n_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_n_u32() {
let a: u32x4 = u32x4::new(1, 2, 9, 10);
let b: u32 = 2;
let e: u64x2 = u64x2::new(18, 20);
let r: u64x2 = transmute(vmull_high_n_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_lane_s16() {
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
let b: i16x4 = i16x4::new(0, 2, 0, 0);
let e: i32x4 = i32x4::new(18, 20, 22, 24);
let r: i32x4 = transmute(vmull_high_lane_s16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_laneq_s16() {
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: i32x4 = i32x4::new(18, 20, 22, 24);
let r: i32x4 = transmute(vmull_high_laneq_s16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_lane_s32() {
let a: i32x4 = i32x4::new(1, 2, 9, 10);
let b: i32x2 = i32x2::new(0, 2);
let e: i64x2 = i64x2::new(18, 20);
let r: i64x2 = transmute(vmull_high_lane_s32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_laneq_s32() {
let a: i32x4 = i32x4::new(1, 2, 9, 10);
let b: i32x4 = i32x4::new(0, 2, 0, 0);
let e: i64x2 = i64x2::new(18, 20);
let r: i64x2 = transmute(vmull_high_laneq_s32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_lane_u16() {
let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
let b: u16x4 = u16x4::new(0, 2, 0, 0);
let e: u32x4 = u32x4::new(18, 20, 22, 24);
let r: u32x4 = transmute(vmull_high_lane_u16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_laneq_u16() {
let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: u32x4 = u32x4::new(18, 20, 22, 24);
let r: u32x4 = transmute(vmull_high_laneq_u16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_lane_u32() {
let a: u32x4 = u32x4::new(1, 2, 9, 10);
let b: u32x2 = u32x2::new(0, 2);
let e: u64x2 = u64x2::new(18, 20);
let r: u64x2 = transmute(vmull_high_lane_u32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_laneq_u32() {
let a: u32x4 = u32x4::new(1, 2, 9, 10);
let b: u32x4 = u32x4::new(0, 2, 0, 0);
let e: u64x2 = u64x2::new(18, 20);
let r: u64x2 = transmute(vmull_high_laneq_u32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulx_f32() {
let a: f32x2 = f32x2::new(1., 2.);
let b: f32x2 = f32x2::new(2., 2.);
let e: f32x2 = f32x2::new(2., 4.);
let r: f32x2 = transmute(vmulx_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxq_f32() {
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
let b: f32x4 = f32x4::new(2., 2., 2., 2.);
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
let r: f32x4 = transmute(vmulxq_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulx_f64() {
let a: f64 = 1.;
let b: f64 = 2.;
let e: f64 = 2.;
let r: f64 = transmute(vmulx_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxq_f64() {
let a: f64x2 = f64x2::new(1., 2.);
let b: f64x2 = f64x2::new(2., 2.);
let e: f64x2 = f64x2::new(2., 4.);
let r: f64x2 = transmute(vmulxq_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulx_lane_f64() {
let a: f64 = 1.;
let b: f64 = 2.;
let e: f64 = 2.;
let r: f64 = transmute(vmulx_lane_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulx_laneq_f64() {
let a: f64 = 1.;
let b: f64x2 = f64x2::new(2., 0.);
let e: f64 = 2.;
let r: f64 = transmute(vmulx_laneq_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulx_lane_f32() {
let a: f32x2 = f32x2::new(1., 2.);
let b: f32x2 = f32x2::new(2., 0.);
let e: f32x2 = f32x2::new(2., 4.);
let r: f32x2 = transmute(vmulx_lane_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulx_laneq_f32() {
let a: f32x2 = f32x2::new(1., 2.);
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
let e: f32x2 = f32x2::new(2., 4.);
let r: f32x2 = transmute(vmulx_laneq_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxq_lane_f32() {
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
let b: f32x2 = f32x2::new(2., 0.);
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
let r: f32x4 = transmute(vmulxq_lane_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxq_laneq_f32() {
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
let r: f32x4 = transmute(vmulxq_laneq_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxq_lane_f64() {
let a: f64x2 = f64x2::new(1., 2.);
let b: f64 = 2.;
let e: f64x2 = f64x2::new(2., 4.);
let r: f64x2 = transmute(vmulxq_lane_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxq_laneq_f64() {
let a: f64x2 = f64x2::new(1., 2.);
let b: f64x2 = f64x2::new(2., 0.);
let e: f64x2 = f64x2::new(2., 4.);
let r: f64x2 = transmute(vmulxq_laneq_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxs_f32() {
let a: f32 = 2.;
let b: f32 = 3.;
let e: f32 = 6.;
let r: f32 = transmute(vmulxs_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxd_f64() {
let a: f64 = 2.;
let b: f64 = 3.;
let e: f64 = 6.;
let r: f64 = transmute(vmulxd_f64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxs_lane_f32() {
let a: f32 = 2.;
let b: f32x2 = f32x2::new(3., 0.);
let e: f32 = 6.;
let r: f32 = transmute(vmulxs_lane_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxs_laneq_f32() {
let a: f32 = 2.;
let b: f32x4 = f32x4::new(3., 0., 0., 0.);
let e: f32 = 6.;
let r: f32 = transmute(vmulxs_laneq_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxd_lane_f64() {
let a: f64 = 2.;
let b: f64 = 3.;
let e: f64 = 6.;
let r: f64 = transmute(vmulxd_lane_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulxd_laneq_f64() {
let a: f64 = 2.;
let b: f64x2 = f64x2::new(3., 0.);
let e: f64 = 6.;
let r: f64 = transmute(vmulxd_laneq_f64::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vfma_f64() {
let a: f64 = 2.0;

View file

@ -108,9 +108,6 @@ extern "C" {
#[link_name = "llvm.aarch64.neon.usqadd.v2i64"]
fn vsqaddq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t;
#[link_name = "llvm.aarch64.neon.pmull64"]
fn vmull_p64_(a: i64, b: i64) -> int8x16_t;
#[link_name = "llvm.aarch64.neon.addp.v8i16"]
fn vpaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t;
#[link_name = "llvm.aarch64.neon.addp.v4i32"]
@ -1150,14 +1147,6 @@ pub unsafe fn vaddlvq_u8(a: uint8x16_t) -> u16 {
vaddlvq_u8_(a) as u16
}
/// Polynomial multiply long
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(pmull))]
pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
transmute(vmull_p64_(transmute(a), transmute(b)))
}
/// Vector add.
#[inline]
#[target_feature(enable = "neon")]
@ -3260,36 +3249,6 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_p64() {
// FIXME: I've a hard time writing a test for this as the documentation
// from arm is a bit thin as to waht exactly it does
let a: i64 = 8;
let b: i64 = 7;
let e: i128 = 56;
let r: i128 = transmute(vmull_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
/*
let a: i64 = 5;
let b: i64 = 5;
let e: i128 = 25;
let r: i128 = transmute(vmull_p64(a, b));
assert_eq!(r, e);
let a: i64 = 6;
let b: i64 = 6;
let e: i128 = 36;
let r: i128 = transmute(vmull_p64(a, b));
assert_eq!(r, e);
let a: i64 = 7;
let b: i64 = 6;
let e: i128 = 42;
let r: i128 = transmute(vmull_p64(a, b));
assert_eq!(r, e);
*/
}
#[simd_test(enable = "neon")]
unsafe fn test_vadd_f64() {
let a = 1.;

View file

@ -5558,6 +5558,38 @@ pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
simd_mul(a, b)
}
/// Polynomial multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v8i8")]
fn vmul_p8_(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t;
}
vmul_p8_(a, b)
}
/// Polynomial multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))]
pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v16i8")]
fn vmulq_p8_(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t;
}
vmulq_p8_(a, b)
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
@ -5578,6 +5610,346 @@ pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
simd_mul(a, b)
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t {
simd_mul(a, vdup_n_s16(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t {
simd_mul(a, vdupq_n_s16(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t {
simd_mul(a, vdup_n_s32(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t {
simd_mul(a, vdupq_n_s32(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t {
simd_mul(a, vdup_n_u16(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t {
simd_mul(a, vdupq_n_u16(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t {
simd_mul(a, vdup_n_u32(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))]
pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t {
simd_mul(a, vdupq_n_u32(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t {
simd_mul(a, vdup_n_f32(b))
}
/// Vector multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))]
pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t {
simd_mul(a, vdupq_n_f32(b))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
static_assert_imm3!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
static_assert_imm3!(LANE);
simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
static_assert_imm1!(LANE);
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
static_assert_imm1!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
static_assert_imm3!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
static_assert_imm3!(LANE);
simd_mul(a, simd_shuffle8(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
static_assert_imm1!(LANE);
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
static_assert_imm1!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
static_assert_imm1!(LANE);
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
static_assert_imm1!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Floating-point multiply
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
static_assert_imm2!(LANE);
simd_mul(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Signed multiply long
#[inline]
#[target_feature(enable = "neon")]
@ -5690,6 +6062,142 @@ pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
vmull_p8_(a, b)
}
/// Vector long multiply with scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
pub unsafe fn vmullh_n_s16(a: int16x4_t, b: i16) -> int32x4_t {
vmull_s16(a, vdup_n_s16(b))
}
/// Vector long multiply with scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
pub unsafe fn vmulls_n_s32(a: int32x2_t, b: i32) -> int64x2_t {
vmull_s32(a, vdup_n_s32(b))
}
/// Vector long multiply with scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
pub unsafe fn vmullh_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t {
vmull_u16(a, vdup_n_u16(b))
}
/// Vector long multiply with scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
pub unsafe fn vmulls_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t {
vmull_u32(a, vdup_n_u32(b))
}
/// Vector long multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int32x4_t {
static_assert_imm2!(LANE);
vmull_s16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Vector long multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
static_assert_imm3!(LANE);
vmull_s16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Vector long multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int64x2_t {
static_assert_imm1!(LANE);
vmull_s32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Vector long multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
static_assert_imm2!(LANE);
vmull_s32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Vector long multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
static_assert_imm2!(LANE);
vmull_u16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Vector long multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t {
static_assert_imm3!(LANE);
vmull_u16(a, simd_shuffle4(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]))
}
/// Vector long multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
static_assert_imm1!(LANE);
vmull_u32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Vector long multiply by scalar
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn vmull_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t {
static_assert_imm2!(LANE);
vmull_u32(a, simd_shuffle2(b, b, [LANE as u32, LANE as u32]))
}
/// Floating-point fused Multiply-Add to accumulator(vector)
#[inline]
#[target_feature(enable = "neon")]
@ -17013,6 +17521,24 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_p8() {
let a: i8x8 = i8x8::new(1, 3, 1, 3, 1, 3, 1, 3);
let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let e: i8x8 = i8x8::new(1, 6, 3, 12, 5, 10, 7, 24);
let r: i8x8 = transmute(vmul_p8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_p8() {
let a: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3);
let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let e: i8x16 = i8x16::new(1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48);
let r: i8x16 = transmute(vmulq_p8(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
@ -17031,6 +17557,276 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_n_s16() {
let a: i16x4 = i16x4::new(1, 2, 3, 4);
let b: i16 = 2;
let e: i16x4 = i16x4::new(2, 4, 6, 8);
let r: i16x4 = transmute(vmul_n_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_n_s16() {
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b: i16 = 2;
let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
let r: i16x8 = transmute(vmulq_n_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_n_s32() {
let a: i32x2 = i32x2::new(1, 2);
let b: i32 = 2;
let e: i32x2 = i32x2::new(2, 4);
let r: i32x2 = transmute(vmul_n_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_n_s32() {
let a: i32x4 = i32x4::new(1, 2, 3, 4);
let b: i32 = 2;
let e: i32x4 = i32x4::new(2, 4, 6, 8);
let r: i32x4 = transmute(vmulq_n_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_n_u16() {
let a: u16x4 = u16x4::new(1, 2, 3, 4);
let b: u16 = 2;
let e: u16x4 = u16x4::new(2, 4, 6, 8);
let r: u16x4 = transmute(vmul_n_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_n_u16() {
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b: u16 = 2;
let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
let r: u16x8 = transmute(vmulq_n_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_n_u32() {
let a: u32x2 = u32x2::new(1, 2);
let b: u32 = 2;
let e: u32x2 = u32x2::new(2, 4);
let r: u32x2 = transmute(vmul_n_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_n_u32() {
let a: u32x4 = u32x4::new(1, 2, 3, 4);
let b: u32 = 2;
let e: u32x4 = u32x4::new(2, 4, 6, 8);
let r: u32x4 = transmute(vmulq_n_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_n_f32() {
let a: f32x2 = f32x2::new(1., 2.);
let b: f32 = 2.;
let e: f32x2 = f32x2::new(2., 4.);
let r: f32x2 = transmute(vmul_n_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_n_f32() {
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
let b: f32 = 2.;
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
let r: f32x4 = transmute(vmulq_n_f32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_lane_s16() {
let a: i16x4 = i16x4::new(1, 2, 3, 4);
let b: i16x4 = i16x4::new(0, 2, 0, 0);
let e: i16x4 = i16x4::new(2, 4, 6, 8);
let r: i16x4 = transmute(vmul_lane_s16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_laneq_s16() {
let a: i16x4 = i16x4::new(1, 2, 3, 4);
let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: i16x4 = i16x4::new(2, 4, 6, 8);
let r: i16x4 = transmute(vmul_laneq_s16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_lane_s16() {
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b: i16x4 = i16x4::new(0, 2, 0, 0);
let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
let r: i16x8 = transmute(vmulq_lane_s16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_laneq_s16() {
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
let r: i16x8 = transmute(vmulq_laneq_s16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_lane_s32() {
let a: i32x2 = i32x2::new(1, 2);
let b: i32x2 = i32x2::new(0, 2);
let e: i32x2 = i32x2::new(2, 4);
let r: i32x2 = transmute(vmul_lane_s32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_laneq_s32() {
let a: i32x2 = i32x2::new(1, 2);
let b: i32x4 = i32x4::new(0, 2, 0, 0);
let e: i32x2 = i32x2::new(2, 4);
let r: i32x2 = transmute(vmul_laneq_s32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_lane_s32() {
let a: i32x4 = i32x4::new(1, 2, 3, 4);
let b: i32x2 = i32x2::new(0, 2);
let e: i32x4 = i32x4::new(2, 4, 6, 8);
let r: i32x4 = transmute(vmulq_lane_s32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_laneq_s32() {
let a: i32x4 = i32x4::new(1, 2, 3, 4);
let b: i32x4 = i32x4::new(0, 2, 0, 0);
let e: i32x4 = i32x4::new(2, 4, 6, 8);
let r: i32x4 = transmute(vmulq_laneq_s32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_lane_u16() {
let a: u16x4 = u16x4::new(1, 2, 3, 4);
let b: u16x4 = u16x4::new(0, 2, 0, 0);
let e: u16x4 = u16x4::new(2, 4, 6, 8);
let r: u16x4 = transmute(vmul_lane_u16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_laneq_u16() {
let a: u16x4 = u16x4::new(1, 2, 3, 4);
let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: u16x4 = u16x4::new(2, 4, 6, 8);
let r: u16x4 = transmute(vmul_laneq_u16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_lane_u16() {
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b: u16x4 = u16x4::new(0, 2, 0, 0);
let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
let r: u16x8 = transmute(vmulq_lane_u16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_laneq_u16() {
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16);
let r: u16x8 = transmute(vmulq_laneq_u16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_lane_u32() {
let a: u32x2 = u32x2::new(1, 2);
let b: u32x2 = u32x2::new(0, 2);
let e: u32x2 = u32x2::new(2, 4);
let r: u32x2 = transmute(vmul_lane_u32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_laneq_u32() {
let a: u32x2 = u32x2::new(1, 2);
let b: u32x4 = u32x4::new(0, 2, 0, 0);
let e: u32x2 = u32x2::new(2, 4);
let r: u32x2 = transmute(vmul_laneq_u32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_lane_u32() {
let a: u32x4 = u32x4::new(1, 2, 3, 4);
let b: u32x2 = u32x2::new(0, 2);
let e: u32x4 = u32x4::new(2, 4, 6, 8);
let r: u32x4 = transmute(vmulq_lane_u32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_laneq_u32() {
let a: u32x4 = u32x4::new(1, 2, 3, 4);
let b: u32x4 = u32x4::new(0, 2, 0, 0);
let e: u32x4 = u32x4::new(2, 4, 6, 8);
let r: u32x4 = transmute(vmulq_laneq_u32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_lane_f32() {
let a: f32x2 = f32x2::new(1., 2.);
let b: f32x2 = f32x2::new(2., 0.);
let e: f32x2 = f32x2::new(2., 4.);
let r: f32x2 = transmute(vmul_lane_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_laneq_f32() {
let a: f32x2 = f32x2::new(1., 2.);
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
let e: f32x2 = f32x2::new(2., 4.);
let r: f32x2 = transmute(vmul_laneq_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_lane_f32() {
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
let b: f32x2 = f32x2::new(2., 0.);
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
let r: f32x4 = transmute(vmulq_lane_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulq_laneq_f32() {
let a: f32x4 = f32x4::new(1., 2., 3., 4.);
let b: f32x4 = f32x4::new(2., 0., 0., 0.);
let e: f32x4 = f32x4::new(2., 4., 6., 8.);
let r: f32x4 = transmute(vmulq_laneq_f32::<0>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_s8() {
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
@ -17094,6 +17890,114 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmullh_n_s16() {
let a: i16x4 = i16x4::new(1, 2, 3, 4);
let b: i16 = 2;
let e: i32x4 = i32x4::new(2, 4, 6, 8);
let r: i32x4 = transmute(vmullh_n_s16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulls_n_s32() {
let a: i32x2 = i32x2::new(1, 2);
let b: i32 = 2;
let e: i64x2 = i64x2::new(2, 4);
let r: i64x2 = transmute(vmulls_n_s32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmullh_n_u16() {
let a: u16x4 = u16x4::new(1, 2, 3, 4);
let b: u16 = 2;
let e: u32x4 = u32x4::new(2, 4, 6, 8);
let r: u32x4 = transmute(vmullh_n_u16(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmulls_n_u32() {
let a: u32x2 = u32x2::new(1, 2);
let b: u32 = 2;
let e: u64x2 = u64x2::new(2, 4);
let r: u64x2 = transmute(vmulls_n_u32(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_lane_s16() {
let a: i16x4 = i16x4::new(1, 2, 3, 4);
let b: i16x4 = i16x4::new(0, 2, 0, 0);
let e: i32x4 = i32x4::new(2, 4, 6, 8);
let r: i32x4 = transmute(vmull_lane_s16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_laneq_s16() {
let a: i16x4 = i16x4::new(1, 2, 3, 4);
let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: i32x4 = i32x4::new(2, 4, 6, 8);
let r: i32x4 = transmute(vmull_laneq_s16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_lane_s32() {
let a: i32x2 = i32x2::new(1, 2);
let b: i32x2 = i32x2::new(0, 2);
let e: i64x2 = i64x2::new(2, 4);
let r: i64x2 = transmute(vmull_lane_s32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_laneq_s32() {
let a: i32x2 = i32x2::new(1, 2);
let b: i32x4 = i32x4::new(0, 2, 0, 0);
let e: i64x2 = i64x2::new(2, 4);
let r: i64x2 = transmute(vmull_laneq_s32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_lane_u16() {
let a: u16x4 = u16x4::new(1, 2, 3, 4);
let b: u16x4 = u16x4::new(0, 2, 0, 0);
let e: u32x4 = u32x4::new(2, 4, 6, 8);
let r: u32x4 = transmute(vmull_lane_u16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_laneq_u16() {
let a: u16x4 = u16x4::new(1, 2, 3, 4);
let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0);
let e: u32x4 = u32x4::new(2, 4, 6, 8);
let r: u32x4 = transmute(vmull_laneq_u16::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_lane_u32() {
let a: u32x2 = u32x2::new(1, 2);
let b: u32x2 = u32x2::new(0, 2);
let e: u64x2 = u64x2::new(2, 4);
let r: u64x2 = transmute(vmull_lane_u32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_laneq_u32() {
let a: u32x2 = u32x2::new(1, 2);
let b: u32x4 = u32x4::new(0, 2, 0, 0);
let e: u64x2 = u64x2::new(2, 4);
let r: u64x2 = transmute(vmull_laneq_u32::<1>(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vfma_f32() {
let a: f32x2 = f32x2::new(2.0, 3.0);

View file

@ -338,7 +338,7 @@ generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t,
/// Signed compare bitwise Test bits nonzero
name = vtst
multi_fn = simd_and, c:in_t
multi_fn = simd_and, c:in_t, a, b
multi_fn = fixed, d:in_t
multi_fn = simd_ne, c, transmute(d)
a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
@ -354,7 +354,7 @@ generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8
/// Unsigned compare bitwise Test bits nonzero
name = vtst
multi_fn = simd_and, c:in_t
multi_fn = simd_and, c:in_t, a, b
multi_fn = fixed, d:in_t
multi_fn = simd_ne, c, transmute(d)
a = MIN, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, MAX
@ -1864,6 +1864,18 @@ aarch64 = mul
fn = simd_mul
generate int*_t, uint*_t
/// Polynomial multiply
name = vmul
a = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
validate 1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48
aarch64 = pmul
link-aarch64 = pmul._EXT_
arm = vmul
link-arm = vmulp._EXT_
generate poly8x8_t, poly8x16_t
/// Multiply
name = vmul
fn = simd_mul
@ -1877,6 +1889,108 @@ generate float64x*_t
arm = vmul.
generate float*_t
/// Vector multiply by scalar
name = vmul
out-n-suffix
multi_fn = simd_mul, a, {vdup-nout-noext, b}
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 2
validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
arm = vmul
aarch64 = mul
generate int16x4_t:i16:int16x4_t, int16x8_t:i16:int16x8_t, int32x2_t:i32:int32x2_t, int32x4_t:i32:int32x4_t
generate uint16x4_t:u16:uint16x4_t, uint16x8_t:u16:uint16x8_t, uint32x2_t:u32:uint32x2_t, uint32x4_t:u32:uint32x4_t
/// Vector multiply by scalar
name = vmul
out-n-suffix
multi_fn = simd_mul, a, {vdup-nout-noext, b}
a = 1., 2., 3., 4.
b = 2.
validate 2., 4., 6., 8.
aarch64 = fmul
generate float64x1_t:f64:float64x1_t, float64x2_t:f64:float64x2_t
arm = vmul
generate float32x2_t:f32:float32x2_t, float32x4_t:f32:float32x4_t
/// Multiply
name = vmul
lane-suffixes
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = simd_mul, a, {simd_shuffle-out_len-noext, b, b, {dup-out_len-LANE as u32}}
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
n = 1
validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
aarch64 = mul
arm = vmul
generate int16x4_t, int16x4_t:int16x8_t:int16x4_t, int16x8_t:int16x4_t:int16x8_t, int16x8_t
generate int32x2_t, int32x2_t:int32x4_t:int32x2_t, int32x4_t:int32x2_t:int32x4_t, int32x4_t
generate uint16x4_t, uint16x4_t:uint16x8_t:uint16x4_t, uint16x8_t:uint16x4_t:uint16x8_t, uint16x8_t
generate uint32x2_t, uint32x2_t:uint32x4_t:uint32x2_t, uint32x4_t:uint32x2_t:uint32x4_t, uint32x4_t
/// Floating-point multiply
name = vmul
lane-suffixes
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = simd_mul, a, {transmute--<element_t _>, {simd_extract, b, LANE as u32}}
a = 1., 2., 3., 4.
b = 2., 0., 0., 0.
n = 0
validate 2., 4., 6., 8.
aarch64 = fmul
generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
/// Floating-point multiply
name = vmul
lane-suffixes
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = simd_mul, a, {simd_shuffle-out_len-noext, b, b, {dup-out_len-LANE as u32}}
a = 1., 2., 3., 4.
b = 2., 0., 0., 0.
n = 0
validate 2., 4., 6., 8.
aarch64 = fmul
generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
arm = vmul
generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
/// Floating-point multiply
name = vmuls_lane
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = simd_extract, b:f32, b, LANE as u32
multi_fn = a * b
a = 1.
b = 2., 0., 0., 0.
n = 0
validate 2.
aarch64 = fmul
generate f32:float32x2_t:f32, f32:float32x4_t:f32
/// Floating-point multiply
name = vmuld_lane
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = simd_extract, b:f64, b, LANE as u32
multi_fn = a * b
a = 1.
b = 2., 0.
n = 0
validate 2.
aarch64 = fmul
generate f64:float64x1_t:f64, f64:float64x2_t:f64
/// Signed multiply long
name = vmull
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
@ -1941,6 +2055,21 @@ link-arm = vmullp._EXT_
link-aarch64 = pmull._EXT_
generate poly8x8_t:poly8x8_t:poly16x8_t
/// Polynomial multiply long
name = vmull
no-q
a = 15
b = 3
validate 17
target = crypto
aarch64 = pmull
link-aarch64 = pmull64:p64:p64:p64:int8x16_t
arm = vmull
link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
//generate p64:p64:p128
/// Polynomial multiply long
name = vmull_high
no-q
@ -1955,6 +2084,144 @@ validate 9, 30, 11, 20, 13, 18, 15, 48
aarch64 = pmull
generate poly8x16_t:poly8x16_t:poly16x8_t
/// Polynomial multiply long
name = vmull_high
no-q
multi_fn = vmull-noqself-noext, {simd_extract, a, 1}, {simd_extract, b, 1}
a = 1, 15
b = 1, 3
validate 17
target = crypto
aarch64 = pmull2
//generate poly64x2_t:poly64x2_t:p128
/// Vector long multiply with scalar
name = vmull
n-suffix
multi_fn = vmull-in0-noext, a, {vdup-nin0-noext, b}
a = 1, 2, 3, 4, 5, 6, 7, 8
b = 2
validate 2, 4, 6, 8, 10, 12, 14, 16
arm = vmull
aarch64 = smull
generate int16x4_t:i16:int32x4_t, int32x2_t:i32:int64x2_t
aarch64 = umull
generate uint16x4_t:u16:uint32x4_t, uint32x2_t:u32:uint64x2_t
/// Vector long multiply by scalar
name = vmull_lane
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = vmull-in0-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
n = 1
validate 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32
arm = vmull
aarch64 = smull
generate int16x4_t:int16x4_t:int32x4_t, int16x4_t:int16x8_t:int32x4_t
generate int32x2_t:int32x2_t:int64x2_t, int32x2_t:int32x4_t:int64x2_t
aarch64 = umull
generate uint16x4_t:uint16x4_t:uint32x4_t, uint16x4_t:uint16x8_t:uint32x4_t
generate uint32x2_t:uint32x2_t:uint64x2_t, uint32x2_t:uint32x4_t:uint64x2_t
/// Multiply long
name = vmull_high_n
no-q
multi_fn = vmull_high-noqself-noext, a, {vdup-nin0-noext, b}
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
b = 2
validate 18, 20, 22, 24, 26, 28, 30, 32
aarch64 = smull2
generate int16x8_t:i16:int32x4_t, int32x4_t:i32:int64x2_t
aarch64 = umull2
generate uint16x8_t:u16:uint32x4_t, uint32x4_t:u32:uint64x2_t
/// Multiply long
name = vmull_high_lane
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = vmull_high-noqself-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
b = 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
n = 1
validate 18, 20, 22, 24, 26, 28, 30, 32
aarch64 = smull2
generate int16x8_t:int16x4_t:int32x4_t, int16x8_t:int16x8_t:int32x4_t
generate int32x4_t:int32x2_t:int64x2_t, int32x4_t:int32x4_t:int64x2_t
aarch64 = umull2
generate uint16x8_t:uint16x4_t:uint32x4_t, uint16x8_t:uint16x8_t:uint32x4_t
generate uint32x4_t:uint32x2_t:uint64x2_t, uint32x4_t:uint32x4_t:uint64x2_t
/// Floating-point multiply extended
name = vmulx
a = 1., 2., 3., 4.
b = 2., 2., 2., 2.
validate 2., 4., 6., 8.
aarch64 = fmulx
link-aarch64 = fmulx._EXT_
generate float*_t, float64x*_t
/// Floating-point multiply extended
name = vmulx
lane-suffixes
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = vmulx-in0-noext, a, {transmute--<element_t _>, {simd_extract, b, LANE as u32}}
a = 1.
b = 2., 0.
n = 0
validate 2.
aarch64 = fmulx
generate float64x1_t, float64x1_t:float64x2_t:float64x1_t
/// Floating-point multiply extended
name = vmulx
lane-suffixes
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = vmulx-in0-noext, a, {simd_shuffle-in0_len-noext, b, b, {dup-in0_len-LANE as u32}}
a = 1., 2., 3., 4.
b = 2., 0., 0., 0.
n = 0
validate 2., 4., 6., 8.
aarch64 = fmulx
generate float32x2_t, float32x2_t:float32x4_t:float32x2_t, float32x4_t:float32x2_t:float32x4_t, float32x4_t
generate float64x2_t:float64x1_t:float64x2_t, float64x2_t
/// Floating-point multiply extended
name = vmulx
a = 2.
b = 3.
validate 6.
aarch64 = fmulx
link-aarch64 = fmulx._EXT_
generate f32, f64
/// Floating-point multiply extended
name = vmulx
lane-suffixes
constn = LANE
multi_fn = static_assert_imm-in_exp_len-LANE
multi_fn = vmulx-out-noext, a, {simd_extract, b, LANE as u32}
a = 2.
b = 3., 0., 0., 0.
n = 0
validate 6.
aarch64 = fmulx
generate f32:float32x2_t:f32, f32:float32x4_t:f32, f64:float64x1_t:f64, f64:float64x2_t:f64
/// Floating-point fused Multiply-Add to accumulator(vector)
name = vfma
a = 2.0, 3.0, 4.0, 5.0
@ -2142,7 +2409,7 @@ generate uint32x4_t:u64
name = vsubhn
no-q
multi_fn = fixed, c:in_t
multi_fn = simd_cast, {simd_shr, {simd_sub}, transmute(c)}
multi_fn = simd_cast, {simd_shr, {simd_sub, a, b}, transmute(c)}
a = MAX, MIN, 1, 1, MAX, MIN, 1, 1
b = 1, 0, 0, 0, 1, 0, 0, 0
fixed = HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS, HFBITS

View file

@ -81,7 +81,7 @@ fn type_len(t: &str) -> usize {
"poly64x1_t" => 1,
"poly64x2_t" => 2,
"i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "p8"
| "p16" => 1,
| "p16" | "p64" | "p128" => 1,
_ => panic!("unknown type: {}", t),
}
}
@ -324,16 +324,16 @@ fn type_to_noq_suffix(t: &str) -> &str {
"int16x4_t" | "int16x8_t" | "i16" => "_s16",
"int32x2_t" | "int32x4_t" | "i32" => "_s32",
"int64x1_t" | "int64x2_t" | "i64" => "_s64",
"uint8x8_t" | "uint8x16_t" => "_u8",
"uint16x4_t" | "uint16x8_t" => "_u16",
"uint32x2_t" | "uint32x4_t" => "_u32",
"uint64x1_t" | "uint64x2_t" => "_u64",
"uint8x8_t" | "uint8x16_t" | "u8" => "_u8",
"uint16x4_t" | "uint16x8_t" | "u16" => "_u16",
"uint32x2_t" | "uint32x4_t" | "u32" => "_u32",
"uint64x1_t" | "uint64x2_t" | "u64" => "_u64",
"float16x4_t" | "float16x8_t" => "_f16",
"float32x2_t" | "float32x4_t" => "_f32",
"float64x1_t" | "float64x2_t" => "_f64",
"poly8x8_t" | "poly8x16_t" => "_p8",
"poly16x4_t" | "poly16x8_t" => "_p16",
"poly64x1_t" | "poly64x2_t" => "_p64",
"poly64x1_t" | "poly64x2_t" | "p64" => "_p64",
_ => panic!("unknown type: {}", t),
}
}
@ -347,6 +347,7 @@ enum Suffix {
NSuffix,
NoQNSuffix,
OutSuffix,
OutNSuffix,
Lane,
In2,
In2Lane,
@ -354,8 +355,10 @@ enum Suffix {
#[derive(Clone, Copy)]
enum TargetFeature {
Default,
ArmV7,
FPArmV8,
Crypto,
}
fn type_to_global_type(t: &str) -> &str {
@ -400,6 +403,8 @@ fn type_to_global_type(t: &str) -> &str {
"f64" => "f64",
"p8" => "p8",
"p16" => "p16",
"p64" => "p64",
"p128" => "p128",
_ => panic!("unknown type: {}", t),
}
}
@ -492,6 +497,10 @@ fn type_to_ext(t: &str) -> &str {
"u16" => "v4i16",
"u32" => "v2i32",
"u64" => "v1i64",
"f32" => "f32",
"f64" => "f64",
"p64" => "p64",
"p128" => "p128",
/*
"poly64x1_t" => "i64x1",
"poly64x2_t" => "i64x2",
@ -825,6 +834,7 @@ fn gen_aarch64(
)],
suffix: Suffix,
para_num: i32,
target: TargetFeature,
fixed: &Vec<String>,
multi_fn: &Vec<String>,
) -> (String, String) {
@ -846,16 +856,20 @@ fn gen_aarch64(
NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])),
NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
OutNSuffix => format!("{}{}", current_name, type_to_n_suffix(out_t)),
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
};
let current_target = match target {
Default => "neon",
ArmV7 => "v7",
FPArmV8 => "fp-armv8,v8",
Crypto => "neon,crypto",
};
let current_fn = if let Some(current_fn) = current_fn.clone() {
if link_aarch64.is_some() {
panic!(
"[{}] Can't specify link and (multi) fn at the same time.",
name
)
panic!("[{}] Can't specify link and fn at the same time.", name)
}
current_fn
} else if link_aarch64.is_some() {
@ -872,7 +886,24 @@ fn gen_aarch64(
let current_aarch64 = current_aarch64.clone().unwrap();
let mut ext_c = String::new();
let mut ext_c_const = String::new();
if let Some(link_aarch64) = link_aarch64.clone() {
let mut link_t: Vec<String> = vec![
in_t[0].to_string(),
in_t[1].to_string(),
in_t[2].to_string(),
out_t.to_string(),
];
if let Some(mut link_aarch64) = link_aarch64.clone() {
if link_aarch64.contains(":") {
let links: Vec<_> = link_aarch64.split(':').map(|v| v.to_string()).collect();
assert_eq!(links.len(), 5);
link_aarch64 = links[0].to_string();
link_t = vec![
links[1].clone(),
links[2].clone(),
links[3].clone(),
links[4].clone(),
];
}
let ext = type_to_ext(in_t[0]);
let ext2 = type_to_ext(out_t);
let link_aarch64 = if link_aarch64.starts_with("llvm") {
@ -893,17 +924,17 @@ fn gen_aarch64(
current_fn,
match para_num {
1 => {
format!("a: {}", in_t[0])
format!("a: {}", link_t[0])
}
2 => {
format!("a: {}, b: {}", in_t[0], in_t[1])
format!("a: {}, b: {}", link_t[0], link_t[1])
}
3 => {
format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2])
format!("a: {}, b: {}, c: {}", link_t[0], link_t[1], link_t[2])
}
_ => unimplemented!("unknown para_num"),
},
out_t
link_t[3]
);
if const_aarch64.is_some() {
ext_c_const = format!(
@ -998,6 +1029,11 @@ fn gen_aarch64(
} else {
String::new()
};
let trans: [&str; 2] = if link_t[3] != out_t {
["transmute(", ")"]
} else {
["", ""]
};
let call = if let Some(const_aarch64) = const_aarch64 {
match para_num {
1 => format!(
@ -1033,16 +1069,16 @@ fn gen_aarch64(
match (multi_calls.len(), para_num, fixed.len()) {
(0, 1, 0) => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}(a)
{}{}{}(a){}
}}"#,
name, const_declare, in_t[0], out_t, ext_c, current_fn,
name, const_declare, in_t[0], out_t, ext_c, trans[0], current_fn, trans[1]
),
(0, 1, _) => {
let fixed: Vec<String> = fixed.iter().take(type_len(in_t[0])).cloned().collect();
format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
let b{};
{}{}(a, transmute(b))
{}{}{}(a, transmute(b)){}
}}"#,
name,
const_declare,
@ -1050,14 +1086,16 @@ fn gen_aarch64(
out_t,
values(in_t[0], &fixed),
ext_c,
trans[0],
current_fn,
trans[1],
)
}
(0, 2, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
{}{}(a, b)
{}{}{}(a, b){}
}}"#,
name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn,
name, const_declare, in_t[0], in_t[1], out_t, ext_c, trans[0], current_fn, trans[1],
),
(0, 3, _) => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{
@ -1090,11 +1128,11 @@ fn gen_aarch64(
r#"
{}
#[inline]
#[target_feature(enable = "neon")]
#[target_feature(enable = "{}")]
#[cfg_attr(test, assert_instr({}{}))]{}
{}
"#,
current_comment, current_aarch64, const_assert, const_legacy, call
current_comment, current_target, current_aarch64, const_assert, const_legacy, call
);
let test = gen_test(
@ -1259,6 +1297,7 @@ fn gen_arm(
NSuffix => format!("{}{}", current_name, type_to_n_suffix(in_t[1])),
NoQNSuffix => format!("{}{}", current_name, type_to_noq_n_suffix(in_t[1])),
OutSuffix => format!("{}{}", current_name, type_to_suffix(out_t)),
OutNSuffix => format!("{}{}", current_name, type_to_n_suffix(out_t)),
Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[1])),
In2 => format!("{}{}", current_name, type_to_suffix(in_t[2])),
In2Lane => format!("{}{}", current_name, type_to_lane_suffixes(out_t, in_t[2])),
@ -1266,10 +1305,17 @@ fn gen_arm(
let current_aarch64 = current_aarch64
.clone()
.unwrap_or_else(|| current_arm.to_string());
let current_target = match target {
let current_target_aarch64 = match target {
Default => "neon",
ArmV7 => "neon",
FPArmV8 => "neon",
Crypto => "neon,crypto",
};
let current_target_arm = match target {
Default => "v7",
ArmV7 => "v7",
FPArmV8 => "fp-armv8,v8",
Crypto => "crypto,v8",
};
let current_fn = if let Some(current_fn) = current_fn.clone() {
@ -1292,9 +1338,57 @@ fn gen_arm(
String::new()
};
let mut ext_c = String::new();
let mut ext_c_const_arm = String::new();
let mut ext_c_const_aarch64 = String::new();
if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
let mut ext_c_arm = if multi_fn.is_empty() {
String::new()
} else {
String::from(
r#"
"#,
)
};
let mut ext_c_aarch64 = if multi_fn.is_empty() {
String::new()
} else {
String::from(
r#"
"#,
)
};
let mut link_arm_t: Vec<String> = vec![
in_t[0].to_string(),
in_t[1].to_string(),
in_t[2].to_string(),
out_t.to_string(),
];
let mut link_aarch64_t: Vec<String> = vec![
in_t[0].to_string(),
in_t[1].to_string(),
in_t[2].to_string(),
out_t.to_string(),
];
if let (Some(mut link_arm), Some(mut link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) {
if link_arm.contains(":") {
let links: Vec<_> = link_arm.split(':').map(|v| v.to_string()).collect();
assert_eq!(links.len(), 5);
link_arm = links[0].to_string();
link_arm_t = vec![
links[1].clone(),
links[2].clone(),
links[3].clone(),
links[4].clone(),
];
}
if link_aarch64.contains(":") {
let links: Vec<_> = link_aarch64.split(':').map(|v| v.to_string()).collect();
assert_eq!(links.len(), 5);
link_aarch64 = links[0].to_string();
link_aarch64_t = vec![
links[1].clone(),
links[2].clone(),
links[3].clone(),
links[4].clone(),
];
}
let ext = type_to_ext(in_t[0]);
let ext2 = type_to_ext(out_t);
let link_arm = if link_arm.starts_with("llvm") {
@ -1311,35 +1405,36 @@ fn gen_arm(
link.push_str(&link_aarch64);
link.replace("_EXT_", ext).replace("_EXT2_", ext2)
};
ext_c = format!(
r#"#[allow(improper_ctypes)]
if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] {
ext_c = format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "arm", link_name = "{}")]
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_arm,
link_aarch64,
current_fn,
match para_num {
1 => {
format!("a: {}", in_t[0])
}
2 => {
format!("a: {}, b: {}", in_t[0], in_t[1])
}
3 => {
format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2])
}
_ => unimplemented!("unknown para_num"),
},
out_t
);
link_arm,
link_aarch64,
current_fn,
match para_num {
1 => {
format!("a: {}", in_t[0])
}
2 => {
format!("a: {}, b: {}", in_t[0], in_t[1])
}
3 => {
format!("a: {}, b: {}, c: {}", in_t[0], in_t[1], in_t[2])
}
_ => unimplemented!("unknown para_num"),
},
out_t
);
};
if const_arm.is_some() {
ext_c_const_arm = format!(
r#"
#[allow(improper_ctypes)]
ext_c_arm.push_str(&format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "arm", link_name = "{}")]
fn {}({}) -> {};
@ -1363,12 +1458,39 @@ fn gen_arm(
_ => unimplemented!("unknown para_num"),
},
out_t
);
));
};
if out_t != link_arm_t[3] {
ext_c_arm.push_str(&format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "arm", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_arm,
current_fn,
match para_num {
1 => {
format!("a: {}", link_arm_t[0])
}
2 => {
format!("a: {}, b: {}", link_arm_t[0], link_arm_t[1])
}
3 => {
format!(
"a: {}, b: {}, c: {}",
link_arm_t[0], link_arm_t[1], link_arm_t[2]
)
}
_ => unimplemented!("unknown para_num"),
},
link_arm_t[3]
));
}
if const_aarch64.is_some() {
ext_c_const_aarch64 = format!(
r#"
#[allow(improper_ctypes)]
ext_c_aarch64.push_str(&format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
@ -1389,7 +1511,35 @@ fn gen_arm(
_ => unimplemented!("unknown para_num"),
},
out_t
);
));
}
if out_t != link_aarch64_t[3] {
ext_c_aarch64.push_str(&format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
"#,
link_aarch64,
current_fn,
match para_num {
1 => {
format!("a: {}", link_aarch64_t[0])
}
2 => {
format!("a: {}, b: {}", link_aarch64_t[0], link_aarch64_t[1])
}
3 => {
format!(
"a: {}, b: {}, c: {}",
link_aarch64_t[0], link_aarch64_t[1], link_aarch64_t[2]
)
}
_ => unimplemented!("unknown para_num"),
},
link_aarch64_t[3]
));
}
};
let multi_calls = if !multi_fn.is_empty() {
@ -1430,6 +1580,11 @@ fn gen_arm(
} else {
String::new()
};
let trans: [&str; 2] = if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] {
["", ""]
} else {
["transmute(", ")"]
};
let call = match (multi_calls.len(), para_num, fixed.len()) {
(0, 1, 0) => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
@ -1485,7 +1640,7 @@ fn gen_arm(
),
(_, _, _) => String::new(),
};
let call_const_arm = if let Some(const_arm) = const_arm {
let call_arm = if let Some(const_arm) = const_arm {
let const_arm = const_arm.replace("ttn", type_to_native_type(in_t[1]));
let mut cnt = String::from(in_t[1]);
cnt.push_str("(");
@ -1501,20 +1656,61 @@ fn gen_arm(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}{}(a, {})
}}"#,
name, const_declare, in_t[0], out_t, multi_calls, ext_c_const_arm, current_fn, cnt
name, const_declare, in_t[0], out_t, multi_calls, ext_c_arm, current_fn, cnt
),
2 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
r#"pub unsafe fn {}{}(a: {}, b:{}) -> {} {{
{}{}{}(a, b, {})
}}"#,
name, const_declare, in_t[0], out_t, multi_calls, ext_c_const_arm, current_fn, cnt
name,
const_declare,
in_t[0],
in_t[1],
out_t,
multi_calls,
ext_c_arm,
current_fn,
cnt
),
_ => String::new(),
}
} else if out_t != link_arm_t[3] {
match para_num {
1 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}{}{}(a){}
}}"#,
name,
const_declare,
in_t[0],
out_t,
multi_calls,
ext_c_arm,
trans[0],
current_fn,
trans[1]
),
2 => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
{}{}{}{}(transmute(a), transmute(b)){}
}}"#,
name,
const_declare,
in_t[0],
in_t[1],
out_t,
multi_calls,
ext_c_arm,
trans[0],
current_fn,
trans[1],
),
_ => String::new(),
}
} else {
String::new()
};
let call_const_aarch64 = if let Some(const_aarch64) = const_aarch64 {
let call_aarch64 = if let Some(const_aarch64) = const_aarch64 {
match para_num {
1 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
@ -1525,55 +1721,94 @@ fn gen_arm(
in_t[0],
out_t,
multi_calls,
ext_c_const_aarch64,
ext_c_aarch64,
current_fn,
const_aarch64
),
2 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
{}{}{}(a, b, {})
}}"#,
name,
const_declare,
in_t[0],
in_t[1],
out_t,
multi_calls,
ext_c_aarch64,
current_fn,
const_aarch64
),
_ => String::new(),
}
} else if out_t != link_aarch64_t[3] {
match para_num {
1 => format!(
r#"pub unsafe fn {}{}(a: {}) -> {} {{
{}{}{}{}(a){}
}}"#,
name,
const_declare,
in_t[0],
out_t,
multi_calls,
ext_c_const_aarch64,
ext_c_aarch64,
trans[0],
current_fn,
const_aarch64
trans[1],
),
2 => format!(
r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{
{}{}{}{}(a, b){}
}}"#,
name,
const_declare,
in_t[0],
in_t[1],
out_t,
multi_calls,
ext_c_aarch64,
trans[0],
current_fn,
trans[1],
),
_ => String::new(),
}
} else {
String::new()
};
let function = if const_arm.is_some() && const_aarch64.is_some() {
let function = if (const_arm.is_some() && const_aarch64.is_some())
|| out_t != link_arm_t[3]
|| out_t != link_aarch64_t[3]
{
format!(
r#"
{}
#[inline]
#[cfg(target_arch = "arm")]
#[target_feature(enable = "neon,v7")]
#[target_feature(enable = "neon,{}")]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}{}))]{}
{}
{}
#[inline]
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
#[target_feature(enable = "{}")]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}{}))]{}
{}
"#,
current_comment,
current_target_arm,
expand_intrinsic(&current_arm, in_t[1]),
const_assert,
const_legacy,
call_const_arm,
call_arm,
current_comment,
current_target_aarch64,
expand_intrinsic(&current_aarch64, in_t[1]),
const_assert,
const_legacy,
call_const_aarch64,
call_aarch64,
)
} else {
format!(
@ -1587,7 +1822,7 @@ fn gen_arm(
{}
"#,
current_comment,
current_target,
current_target_arm,
expand_intrinsic(&current_arm, in_t[1]),
const_assert,
expand_intrinsic(&current_aarch64, in_t[1]),
@ -1755,6 +1990,7 @@ fn get_call(
let len = match &*fn_format[1] {
"out_len" => type_len(out_t),
"in_len" => type_len(in_t[1]),
"in0_len" => type_len(in_t[0]),
"halflen" => type_len(in_t[1]) / 2,
_ => 0,
};
@ -2003,6 +2239,8 @@ fn get_call(
fn_name.push_str(type_to_n_suffix(in_t[1]));
} else if fn_format[1] == "out" {
fn_name.push_str(type_to_suffix(out_t));
} else if fn_format[1] == "in0" {
fn_name.push_str(type_to_suffix(in_t[0]));
} else if fn_format[1] == "in2" {
fn_name.push_str(type_to_suffix(in_t[2]));
} else if fn_format[1] == "signed" {
@ -2028,6 +2266,8 @@ fn get_call(
fn_name.push_str(&(type_len(in_t[1]) / 2).to_string());
} else if fn_format[1] == "nout" {
fn_name.push_str(type_to_n_suffix(out_t));
} else if fn_format[1] == "nin0" {
fn_name.push_str(type_to_n_suffix(in_t[0]));
} else if fn_format[1] == "nsigned" {
fn_name.push_str(type_to_n_suffix(type_to_signed(in_t[1])));
} else if fn_format[1] == "in_ntt" {
@ -2063,7 +2303,7 @@ fn get_call(
}
}
if param_str.is_empty() {
param_str.push_str("a, b");
return fn_name;
}
let fn_str = if let Some((re_name, re_type)) = re.clone() {
format!(
@ -2108,7 +2348,7 @@ fn main() -> io::Result<()> {
Vec<String>,
)> = Vec::new();
let mut multi_fn: Vec<String> = Vec::new();
let mut target: TargetFeature = ArmV7;
let mut target: TargetFeature = Default;
//
// THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
@ -2189,7 +2429,7 @@ mod test {
fixed = Vec::new();
n = None;
multi_fn = Vec::new();
target = ArmV7;
target = Default;
} else if line.starts_with("//") {
} else if line.starts_with("name = ") {
current_name = Some(String::from(&line[7..]));
@ -2211,6 +2451,8 @@ mod test {
suffix = NoQDouble;
} else if line.starts_with("n-suffix") {
suffix = NSuffix;
} else if line.starts_with("out-n-suffix") {
suffix = OutNSuffix;
} else if line.starts_with("noq-n-suffix") {
suffix = NoQNSuffix;
} else if line.starts_with("out-suffix") {
@ -2245,10 +2487,12 @@ mod test {
} else if line.starts_with("target = ") {
target = match Some(String::from(&line[9..])) {
Some(input) => match input.as_str() {
"v7" => ArmV7,
"fp-armv8" => FPArmV8,
_ => ArmV7,
"crypto" => Crypto,
_ => Default,
},
_ => ArmV7,
_ => Default,
}
} else if line.starts_with("generate ") {
let line = &line[9..];
@ -2328,6 +2572,7 @@ mod test {
&current_tests,
suffix,
para_num,
target,
&fixed,
&multi_fn,
);