Add vmull, vmull_high, vmlal, vmlal_high, vmlsl, vmlsl_high neon instructions (#1091)
This commit is contained in:
parent
ce1027d7d5
commit
63facc4b68
4 changed files with 1029 additions and 6 deletions
|
|
@ -1617,6 +1617,66 @@ pub unsafe fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float
|
|||
simd_add(a, simd_mul(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2))]
|
||||
pub unsafe fn vmlal_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
|
||||
let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let c: int8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
vmlal_s8(a, b, c)
|
||||
}
|
||||
|
||||
/// Signed multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2))]
|
||||
pub unsafe fn vmlal_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
|
||||
let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
let c: int16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]);
|
||||
vmlal_s16(a, b, c)
|
||||
}
|
||||
|
||||
/// Signed multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlal2))]
|
||||
pub unsafe fn vmlal_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
|
||||
let b: int32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
let c: int32x2_t = simd_shuffle2(c, c, [2, 3]);
|
||||
vmlal_s32(a, b, c)
|
||||
}
|
||||
|
||||
/// Unsigned multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2))]
|
||||
pub unsafe fn vmlal_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
|
||||
let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let c: uint8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
vmlal_u8(a, b, c)
|
||||
}
|
||||
|
||||
/// Unsigned multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2))]
|
||||
pub unsafe fn vmlal_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
|
||||
let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
let c: uint16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]);
|
||||
vmlal_u16(a, b, c)
|
||||
}
|
||||
|
||||
/// Unsigned multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlal2))]
|
||||
pub unsafe fn vmlal_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
|
||||
let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
let c: uint32x2_t = simd_shuffle2(c, c, [2, 3]);
|
||||
vmlal_u32(a, b, c)
|
||||
}
|
||||
|
||||
/// Floating-point multiply-subtract from accumulator
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -1633,6 +1693,66 @@ pub unsafe fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float
|
|||
simd_sub(a, simd_mul(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2))]
|
||||
pub unsafe fn vmlsl_high_s8(a: int16x8_t, b: int8x16_t, c: int8x16_t) -> int16x8_t {
|
||||
let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let c: int8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
vmlsl_s8(a, b, c)
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2))]
|
||||
pub unsafe fn vmlsl_high_s16(a: int32x4_t, b: int16x8_t, c: int16x8_t) -> int32x4_t {
|
||||
let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
let c: int16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]);
|
||||
vmlsl_s16(a, b, c)
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smlsl2))]
|
||||
pub unsafe fn vmlsl_high_s32(a: int64x2_t, b: int32x4_t, c: int32x4_t) -> int64x2_t {
|
||||
let b: int32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
let c: int32x2_t = simd_shuffle2(c, c, [2, 3]);
|
||||
vmlsl_s32(a, b, c)
|
||||
}
|
||||
|
||||
/// Unsigned multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2))]
|
||||
pub unsafe fn vmlsl_high_u8(a: uint16x8_t, b: uint8x16_t, c: uint8x16_t) -> uint16x8_t {
|
||||
let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let c: uint8x8_t = simd_shuffle8(c, c, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
vmlsl_u8(a, b, c)
|
||||
}
|
||||
|
||||
/// Unsigned multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2))]
|
||||
pub unsafe fn vmlsl_high_u16(a: uint32x4_t, b: uint16x8_t, c: uint16x8_t) -> uint32x4_t {
|
||||
let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
let c: uint16x4_t = simd_shuffle4(c, c, [4, 5, 6, 7]);
|
||||
vmlsl_u16(a, b, c)
|
||||
}
|
||||
|
||||
/// Unsigned multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umlsl2))]
|
||||
pub unsafe fn vmlsl_high_u32(a: uint64x2_t, b: uint32x4_t, c: uint32x4_t) -> uint64x2_t {
|
||||
let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
let c: uint32x2_t = simd_shuffle2(c, c, [2, 3]);
|
||||
vmlsl_u32(a, b, c)
|
||||
}
|
||||
|
||||
/// Multiply
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -1649,6 +1769,76 @@ pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
|||
simd_mul(a, b)
|
||||
}
|
||||
|
||||
/// Signed multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2))]
|
||||
pub unsafe fn vmull_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t {
|
||||
let a: int8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: int8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
vmull_s8(a, b)
|
||||
}
|
||||
|
||||
/// Signed multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2))]
|
||||
pub unsafe fn vmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
|
||||
let a: int16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]);
|
||||
let b: int16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
vmull_s16(a, b)
|
||||
}
|
||||
|
||||
/// Signed multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smull2))]
|
||||
pub unsafe fn vmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
|
||||
let a: int32x2_t = simd_shuffle2(a, a, [2, 3]);
|
||||
let b: int32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
vmull_s32(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2))]
|
||||
pub unsafe fn vmull_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t {
|
||||
let a: uint8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: uint8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
vmull_u8(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2))]
|
||||
pub unsafe fn vmull_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t {
|
||||
let a: uint16x4_t = simd_shuffle4(a, a, [4, 5, 6, 7]);
|
||||
let b: uint16x4_t = simd_shuffle4(b, b, [4, 5, 6, 7]);
|
||||
vmull_u16(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umull2))]
|
||||
pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
|
||||
let a: uint32x2_t = simd_shuffle2(a, a, [2, 3]);
|
||||
let b: uint32x2_t = simd_shuffle2(b, b, [2, 3]);
|
||||
vmull_u32(a, b)
|
||||
}
|
||||
|
||||
/// Polynomial multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(pmull))]
|
||||
pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
|
||||
let a: poly8x8_t = simd_shuffle8(a, a, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
let b: poly8x8_t = simd_shuffle8(b, b, [8, 9, 10, 11, 12, 13, 14, 15]);
|
||||
vmull_p8(a, b)
|
||||
}
|
||||
|
||||
/// Divide
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -3258,6 +3448,66 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_s8() {
|
||||
let a: i16x8 = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
|
||||
let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let e: i16x8 = i16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r: i16x8 = transmute(vmlal_high_s8(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_s16() {
|
||||
let a: i32x4 = i32x4::new(8, 7, 6, 5);
|
||||
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let e: i32x4 = i32x4::new(8, 9, 10, 11);
|
||||
let r: i32x4 = transmute(vmlal_high_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_s32() {
|
||||
let a: i64x2 = i64x2::new(8, 7);
|
||||
let b: i32x4 = i32x4::new(2, 2, 2, 2);
|
||||
let c: i32x4 = i32x4::new(3, 3, 0, 1);
|
||||
let e: i64x2 = i64x2::new(8, 9);
|
||||
let r: i64x2 = transmute(vmlal_high_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_u8() {
|
||||
let a: u16x8 = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1);
|
||||
let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let e: u16x8 = u16x8::new(8, 9, 10, 11, 12, 13, 14, 15);
|
||||
let r: u16x8 = transmute(vmlal_high_u8(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_u16() {
|
||||
let a: u32x4 = u32x4::new(8, 7, 6, 5);
|
||||
let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let e: u32x4 = u32x4::new(8, 9, 10, 11);
|
||||
let r: u32x4 = transmute(vmlal_high_u16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_high_u32() {
|
||||
let a: u64x2 = u64x2::new(8, 7);
|
||||
let b: u32x4 = u32x4::new(2, 2, 2, 2);
|
||||
let c: u32x4 = u32x4::new(3, 3, 0, 1);
|
||||
let e: u64x2 = u64x2::new(8, 9);
|
||||
let r: u64x2 = transmute(vmlal_high_u32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmls_f64() {
|
||||
let a: f64 = 6.;
|
||||
|
|
@ -3278,6 +3528,66 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_s8() {
|
||||
let a: i16x8 = i16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
|
||||
let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: i8x16 = i8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let e: i16x8 = i16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
|
||||
let r: i16x8 = transmute(vmlsl_high_s8(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_s16() {
|
||||
let a: i32x4 = i32x4::new(14, 15, 16, 17);
|
||||
let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: i16x8 = i16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let e: i32x4 = i32x4::new(14, 13, 12, 11);
|
||||
let r: i32x4 = transmute(vmlsl_high_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_s32() {
|
||||
let a: i64x2 = i64x2::new(14, 15);
|
||||
let b: i32x4 = i32x4::new(2, 2, 2, 2);
|
||||
let c: i32x4 = i32x4::new(3, 3, 0, 1);
|
||||
let e: i64x2 = i64x2::new(14, 13);
|
||||
let r: i64x2 = transmute(vmlsl_high_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_u8() {
|
||||
let a: u16x8 = u16x8::new(14, 15, 16, 17, 18, 19, 20, 21);
|
||||
let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: u8x16 = u8x16::new(3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let e: u16x8 = u16x8::new(14, 13, 12, 11, 10, 9, 8, 7);
|
||||
let r: u16x8 = transmute(vmlsl_high_u8(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_u16() {
|
||||
let a: u32x4 = u32x4::new(14, 15, 16, 17);
|
||||
let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: u16x8 = u16x8::new(3, 3, 0, 1, 0, 1, 2, 3);
|
||||
let e: u32x4 = u32x4::new(14, 13, 12, 11);
|
||||
let r: u32x4 = transmute(vmlsl_high_u16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_high_u32() {
|
||||
let a: u64x2 = u64x2::new(14, 15);
|
||||
let b: u32x4 = u32x4::new(2, 2, 2, 2);
|
||||
let c: u32x4 = u32x4::new(3, 3, 0, 1);
|
||||
let e: u64x2 = u64x2::new(14, 13);
|
||||
let r: u64x2 = transmute(vmlsl_high_u32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmul_f64() {
|
||||
let a: f64 = 1.0;
|
||||
|
|
@ -3296,6 +3606,69 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_s8() {
|
||||
let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
|
||||
let e: i16x8 = i16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
|
||||
let r: i16x8 = transmute(vmull_high_s8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_s16() {
|
||||
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
|
||||
let e: i32x4 = i32x4::new(9, 20, 11, 24);
|
||||
let r: i32x4 = transmute(vmull_high_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_s32() {
|
||||
let a: i32x4 = i32x4::new(1, 2, 9, 10);
|
||||
let b: i32x4 = i32x4::new(1, 2, 1, 2);
|
||||
let e: i64x2 = i64x2::new(9, 20);
|
||||
let r: i64x2 = transmute(vmull_high_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_u8() {
|
||||
let a: u8x16 = u8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2);
|
||||
let e: u16x8 = u16x8::new(9, 20, 11, 24, 13, 28, 15, 32);
|
||||
let r: u16x8 = transmute(vmull_high_u8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_u16() {
|
||||
let a: u16x8 = u16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2);
|
||||
let e: u32x4 = u32x4::new(9, 20, 11, 24);
|
||||
let r: u32x4 = transmute(vmull_high_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_u32() {
|
||||
let a: u32x4 = u32x4::new(1, 2, 9, 10);
|
||||
let b: u32x4 = u32x4::new(1, 2, 1, 2);
|
||||
let e: u64x2 = u64x2::new(9, 20);
|
||||
let r: u64x2 = transmute(vmull_high_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_p8() {
|
||||
let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let b: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3);
|
||||
let e: i16x8 = i16x8::new(9, 30, 11, 20, 13, 18, 15, 48);
|
||||
let r: i16x8 = transmute(vmull_high_p8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vdiv_f32() {
|
||||
let a: f32x2 = f32x2::new(2.0, 6.0);
|
||||
|
|
|
|||
|
|
@ -2121,6 +2121,66 @@ pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
|
|||
simd_add(a, simd_mul(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))]
|
||||
pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
|
||||
simd_add(a, vmull_s8(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))]
|
||||
pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
|
||||
simd_add(a, vmull_s16(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))]
|
||||
pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
|
||||
simd_add(a, vmull_s32(b, c))
|
||||
}
|
||||
|
||||
/// Unsigned multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))]
|
||||
pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
|
||||
simd_add(a, vmull_u8(b, c))
|
||||
}
|
||||
|
||||
/// Unsigned multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))]
|
||||
pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
|
||||
simd_add(a, vmull_u16(b, c))
|
||||
}
|
||||
|
||||
/// Unsigned multiply-add long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))]
|
||||
pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
|
||||
simd_add(a, vmull_u32(b, c))
|
||||
}
|
||||
|
||||
/// Multiply-subtract from accumulator
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -2261,6 +2321,66 @@ pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float
|
|||
simd_sub(a, simd_mul(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))]
|
||||
pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t {
|
||||
simd_sub(a, vmull_s8(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))]
|
||||
pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t {
|
||||
simd_sub(a, vmull_s16(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))]
|
||||
pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t {
|
||||
simd_sub(a, vmull_s32(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))]
|
||||
pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t {
|
||||
simd_sub(a, vmull_u8(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))]
|
||||
pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t {
|
||||
simd_sub(a, vmull_u16(b, c))
|
||||
}
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))]
|
||||
pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t {
|
||||
simd_sub(a, vmull_u32(b, c))
|
||||
}
|
||||
|
||||
/// Saturating subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -3297,6 +3417,118 @@ pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
|
|||
simd_mul(a, b)
|
||||
}
|
||||
|
||||
/// Signed multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
|
||||
pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v8i8")]
|
||||
fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t;
|
||||
}
|
||||
vmull_s8_(a, b)
|
||||
}
|
||||
|
||||
/// Signed multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
|
||||
pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v4i16")]
|
||||
fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t;
|
||||
}
|
||||
vmull_s16_(a, b)
|
||||
}
|
||||
|
||||
/// Signed multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))]
|
||||
pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v2i32")]
|
||||
fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t;
|
||||
}
|
||||
vmull_s32_(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
|
||||
pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v8i8")]
|
||||
fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t;
|
||||
}
|
||||
vmull_u8_(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
|
||||
pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v4i16")]
|
||||
fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t;
|
||||
}
|
||||
vmull_u16_(a, b)
|
||||
}
|
||||
|
||||
/// Unsigned multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))]
|
||||
pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v2i32")]
|
||||
fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t;
|
||||
}
|
||||
vmull_u32_(a, b)
|
||||
}
|
||||
|
||||
/// Polynomial multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmull))]
|
||||
pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull.v8i8")]
|
||||
fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t;
|
||||
}
|
||||
vmull_p8_(a, b)
|
||||
}
|
||||
|
||||
/// Subtract
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6105,6 +6337,66 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_s8() {
|
||||
let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
|
||||
let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
|
||||
let r: i16x8 = transmute(vmlal_s8(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_s16() {
|
||||
let a: i32x4 = i32x4::new(0, 1, 2, 3);
|
||||
let b: i16x4 = i16x4::new(2, 2, 2, 2);
|
||||
let c: i16x4 = i16x4::new(3, 3, 3, 3);
|
||||
let e: i32x4 = i32x4::new(6, 7, 8, 9);
|
||||
let r: i32x4 = transmute(vmlal_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_s32() {
|
||||
let a: i64x2 = i64x2::new(0, 1);
|
||||
let b: i32x2 = i32x2::new(2, 2);
|
||||
let c: i32x2 = i32x2::new(3, 3);
|
||||
let e: i64x2 = i64x2::new(6, 7);
|
||||
let r: i64x2 = transmute(vmlal_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_u8() {
|
||||
let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
|
||||
let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
|
||||
let r: u16x8 = transmute(vmlal_u8(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_u16() {
|
||||
let a: u32x4 = u32x4::new(0, 1, 2, 3);
|
||||
let b: u16x4 = u16x4::new(2, 2, 2, 2);
|
||||
let c: u16x4 = u16x4::new(3, 3, 3, 3);
|
||||
let e: u32x4 = u32x4::new(6, 7, 8, 9);
|
||||
let r: u32x4 = transmute(vmlal_u16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlal_u32() {
|
||||
let a: u64x2 = u64x2::new(0, 1);
|
||||
let b: u32x2 = u32x2::new(2, 2);
|
||||
let c: u32x2 = u32x2::new(3, 3);
|
||||
let e: u64x2 = u64x2::new(6, 7);
|
||||
let r: u64x2 = transmute(vmlal_u32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmls_s8() {
|
||||
let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13);
|
||||
|
|
@ -6245,6 +6537,66 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_s8() {
|
||||
let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
|
||||
let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
|
||||
let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r: i16x8 = transmute(vmlsl_s8(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_s16() {
|
||||
let a: i32x4 = i32x4::new(6, 7, 8, 9);
|
||||
let b: i16x4 = i16x4::new(2, 2, 2, 2);
|
||||
let c: i16x4 = i16x4::new(3, 3, 3, 3);
|
||||
let e: i32x4 = i32x4::new(0, 1, 2, 3);
|
||||
let r: i32x4 = transmute(vmlsl_s16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_s32() {
|
||||
let a: i64x2 = i64x2::new(6, 7);
|
||||
let b: i32x2 = i32x2::new(2, 2);
|
||||
let c: i32x2 = i32x2::new(3, 3);
|
||||
let e: i64x2 = i64x2::new(0, 1);
|
||||
let r: i64x2 = transmute(vmlsl_s32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_u8() {
|
||||
let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13);
|
||||
let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2);
|
||||
let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3);
|
||||
let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
let r: u16x8 = transmute(vmlsl_u8(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_u16() {
|
||||
let a: u32x4 = u32x4::new(6, 7, 8, 9);
|
||||
let b: u16x4 = u16x4::new(2, 2, 2, 2);
|
||||
let c: u16x4 = u16x4::new(3, 3, 3, 3);
|
||||
let e: u32x4 = u32x4::new(0, 1, 2, 3);
|
||||
let r: u32x4 = transmute(vmlsl_u16(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmlsl_u32() {
|
||||
let a: u64x2 = u64x2::new(6, 7);
|
||||
let b: u32x2 = u32x2::new(2, 2);
|
||||
let c: u32x2 = u32x2::new(3, 3);
|
||||
let e: u64x2 = u64x2::new(0, 1);
|
||||
let r: u64x2 = transmute(vmlsl_u32(transmute(a), transmute(b), transmute(c)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqsub_u8() {
|
||||
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
|
||||
|
|
@ -6875,6 +7227,69 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_s8() {
|
||||
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
|
||||
let e: i16x8 = i16x8::new(1, 4, 3, 8, 5, 12, 7, 16);
|
||||
let r: i16x8 = transmute(vmull_s8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_s16() {
|
||||
let a: i16x4 = i16x4::new(1, 2, 3, 4);
|
||||
let b: i16x4 = i16x4::new(1, 2, 1, 2);
|
||||
let e: i32x4 = i32x4::new(1, 4, 3, 8);
|
||||
let r: i32x4 = transmute(vmull_s16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_s32() {
|
||||
let a: i32x2 = i32x2::new(1, 2);
|
||||
let b: i32x2 = i32x2::new(1, 2);
|
||||
let e: i64x2 = i64x2::new(1, 4);
|
||||
let r: i64x2 = transmute(vmull_s32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_u8() {
|
||||
let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2);
|
||||
let e: u16x8 = u16x8::new(1, 4, 3, 8, 5, 12, 7, 16);
|
||||
let r: u16x8 = transmute(vmull_u8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_u16() {
|
||||
let a: u16x4 = u16x4::new(1, 2, 3, 4);
|
||||
let b: u16x4 = u16x4::new(1, 2, 1, 2);
|
||||
let e: u32x4 = u32x4::new(1, 4, 3, 8);
|
||||
let r: u32x4 = transmute(vmull_u16(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_u32() {
|
||||
let a: u32x2 = u32x2::new(1, 2);
|
||||
let b: u32x2 = u32x2::new(1, 2);
|
||||
let e: u64x2 = u64x2::new(1, 4);
|
||||
let r: u64x2 = transmute(vmull_u32(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_p8() {
|
||||
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let b: i8x8 = i8x8::new(1, 3, 1, 3, 1, 3, 1, 3);
|
||||
let e: i16x8 = i16x8::new(1, 6, 3, 12, 5, 10, 7, 24);
|
||||
let r: i16x8 = transmute(vmull_p8(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vsub_s8() {
|
||||
let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
|
|
|||
|
|
@ -784,6 +784,60 @@ generate float64x*_t
|
|||
arm = vmla.
|
||||
generate float*_t
|
||||
|
||||
/// Signed multiply-add long
|
||||
name = vmlal
|
||||
multi_fn = simd_add, a, {vmull-self-noext, b, c}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
|
||||
arm = vmlal.s
|
||||
aarch64 = smlal
|
||||
generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Unsigned multiply-add long
|
||||
name = vmlal
|
||||
multi_fn = simd_add, a, {vmull-self-noext, b, c}
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
validate 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
|
||||
arm = vmlal.s
|
||||
aarch64 = umlal
|
||||
generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
|
||||
|
||||
/// Signed multiply-add long
|
||||
name = vmlal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = vmlal-noqself-noext, a, b, c
|
||||
a = 8, 7, 6, 5, 4, 3, 2, 1
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
validate 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = smlal2
|
||||
generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Unsigned multiply-add long
|
||||
name = vmlal_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = vmlal-noqself-noext, a, b, c
|
||||
a = 8, 7, 6, 5, 4, 3, 2, 1
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
validate 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = umlal2
|
||||
generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Multiply-subtract from accumulator
|
||||
name = vmls
|
||||
multi_fn = simd_sub, a, {simd_mul, b, c}
|
||||
|
|
@ -810,6 +864,60 @@ generate float64x*_t
|
|||
arm = vmls.
|
||||
generate float*_t
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
name = vmlsl
|
||||
multi_fn = simd_sub, a, {vmull-self-noext, b, c}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
arm = vmlsl.s
|
||||
aarch64 = smlsl
|
||||
generate int16x8_t:int8x8_t:int8x8_t:int16x8_t, int32x4_t:int16x4_t:int16x4_t:int32x4_t, int64x2_t:int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
name = vmlsl
|
||||
multi_fn = simd_sub, a, {vmull-self-noext, b, c}
|
||||
a = 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
arm = vmlsl.s
|
||||
aarch64 = umlsl
|
||||
generate uint16x8_t:uint8x8_t:uint8x8_t:uint16x8_t, uint32x4_t:uint16x4_t:uint16x4_t:uint32x4_t, uint64x2_t:uint32x2_t:uint32x2_t:uint64x2_t
|
||||
|
||||
/// Signed multiply-subtract long
|
||||
name = vmlsl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = vmlsl-noqself-noext, a, b, c
|
||||
a = 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
validate 14, 13, 12, 11, 10, 9, 8, 7
|
||||
|
||||
aarch64 = smlsl2
|
||||
generate int16x8_t:int8x16_t:int8x16_t:int16x8_t, int32x4_t:int16x8_t:int16x8_t:int32x4_t, int64x2_t:int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Unsigned multiply-subtract long
|
||||
name = vmlsl_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, c:half, c, c, {fixed-half-right}
|
||||
multi_fn = vmlsl-noqself-noext, a, b, c
|
||||
a = 14, 15, 16, 17, 18, 19, 20, 21
|
||||
b = 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
|
||||
c = 3, 3, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 6, 7
|
||||
fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
validate 14, 13, 12, 11, 10, 9, 8, 7
|
||||
|
||||
aarch64 = umlsl2
|
||||
generate uint16x8_t:uint8x16_t:uint8x16_t:uint16x8_t, uint32x4_t:uint16x8_t:uint16x8_t:uint32x4_t, uint64x2_t:uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Saturating subtract
|
||||
name = vqsub
|
||||
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42
|
||||
|
|
@ -907,6 +1015,84 @@ generate float64x*_t
|
|||
arm = vmul.
|
||||
generate float*_t
|
||||
|
||||
/// Signed multiply long
|
||||
name = vmull
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
|
||||
validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32
|
||||
|
||||
arm = vmull.s
|
||||
aarch64 = smull
|
||||
link-arm = vmulls._EXT_
|
||||
link-aarch64 = smull._EXT_
|
||||
generate int8x8_t:int8x8_t:int16x8_t, int16x4_t:int16x4_t:int32x4_t, int32x2_t:int32x2_t:int64x2_t
|
||||
|
||||
/// Signed multiply long
|
||||
name = vmull_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = vmull-noqself-noext, a, b
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
|
||||
fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
validate 9, 20, 11, 24, 13, 28, 15, 32
|
||||
|
||||
aarch64 = smull2
|
||||
generate int8x16_t:int8x16_t:int16x8_t, int16x8_t:int16x8_t:int32x4_t, int32x4_t:int32x4_t:int64x2_t
|
||||
|
||||
/// Unsigned multiply long
|
||||
name = vmull
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8
|
||||
b = 1, 2, 1, 2, 1, 2, 1, 2
|
||||
validate 1, 4, 3, 8, 5, 12, 7, 16
|
||||
|
||||
arm = vmull.s
|
||||
aarch64 = umull
|
||||
link-arm = vmullu._EXT_
|
||||
link-aarch64 = umull._EXT_
|
||||
generate uint8x8_t:uint8x8_t:uint16x8_t, uint16x4_t:uint16x4_t:uint32x4_t, uint32x2_t:uint32x2_t:uint64x2_t
|
||||
|
||||
/// Unsigned multiply long
|
||||
name = vmull_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = vmull-noqself-noext, a, b
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2
|
||||
fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
validate 9, 20, 11, 24, 13, 28, 15, 32
|
||||
|
||||
aarch64 = umull2
|
||||
generate uint8x16_t:uint8x16_t:uint16x8_t, uint16x8_t:uint16x8_t:uint32x4_t, uint32x4_t:uint32x4_t:uint64x2_t
|
||||
|
||||
/// Polynomial multiply long
|
||||
name = vmull
|
||||
a = 1, 2, 3, 4, 5, 6, 7, 8
|
||||
b = 1, 3, 1, 3, 1, 3, 1, 3
|
||||
validate 1, 6, 3, 12, 5, 10, 7, 24
|
||||
|
||||
arm = vmull.s
|
||||
aarch64 = pmull
|
||||
link-arm = vmullp._EXT_
|
||||
link-aarch64 = pmull._EXT_
|
||||
generate poly8x8_t:poly8x8_t:poly16x8_t
|
||||
|
||||
/// Polynomial multiply long
|
||||
name = vmull_high
|
||||
no-q
|
||||
multi_fn = simd_shuffle-out_len-noext, a:half, a, a, {fixed-half-right}
|
||||
multi_fn = simd_shuffle-out_len-noext, b:half, b, b, {fixed-half-right}
|
||||
multi_fn = vmull-noqself-noext, a, b
|
||||
a = 1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
b = 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3
|
||||
fixed = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
validate 9, 30, 11, 20, 13, 18, 15, 48
|
||||
|
||||
aarch64 = pmull
|
||||
generate poly8x16_t:poly8x16_t:poly16x8_t
|
||||
|
||||
/// Divide
|
||||
name = vdiv
|
||||
fn = simd_div
|
||||
|
|
|
|||
|
|
@ -75,6 +75,8 @@ fn type_len(t: &str) -> usize {
|
|||
"float64x2_t" => 2,
|
||||
"poly8x8_t" => 8,
|
||||
"poly8x16_t" => 16,
|
||||
"poly16x4_t" => 4,
|
||||
"poly16x8_t" => 8,
|
||||
"poly64x1_t" => 1,
|
||||
"poly64x2_t" => 2,
|
||||
_ => panic!("unknown type: {}", t),
|
||||
|
|
@ -231,6 +233,8 @@ fn type_to_global_type(t: &str) -> &str {
|
|||
"float64x2_t" => "f64x2",
|
||||
"poly8x8_t" => "i8x8",
|
||||
"poly8x16_t" => "i8x16",
|
||||
"poly16x4_t" => "i16x4",
|
||||
"poly16x8_t" => "i16x8",
|
||||
"poly64x1_t" => "i64x1",
|
||||
"poly64x2_t" => "i64x2",
|
||||
_ => panic!("unknown type: {}", t),
|
||||
|
|
@ -291,6 +295,10 @@ fn type_to_ext(t: &str) -> &str {
|
|||
"float32x4_t" => "v4f32",
|
||||
"float64x1_t" => "v1f64",
|
||||
"float64x2_t" => "v2f64",
|
||||
"poly8x8_t" => "v8i8",
|
||||
"poly8x16_t" => "v16i8",
|
||||
"poly16x4_t" => "v4i16",
|
||||
"poly16x8_t" => "v8i16",
|
||||
/*
|
||||
"poly64x1_t" => "i64x1",
|
||||
"poly64x2_t" => "i64x2",
|
||||
|
|
@ -299,6 +307,24 @@ fn type_to_ext(t: &str) -> &str {
|
|||
}
|
||||
}
|
||||
|
||||
fn type_to_half(t: &str) -> &str {
|
||||
match t {
|
||||
"int8x16_t" => "int8x8_t",
|
||||
"int16x8_t" => "int16x4_t",
|
||||
"int32x4_t" => "int32x2_t",
|
||||
"int64x2_t" => "int64x1_t",
|
||||
"uint8x16_t" => "uint8x8_t",
|
||||
"uint16x8_t" => "uint16x4_t",
|
||||
"uint32x4_t" => "uint32x2_t",
|
||||
"uint64x2_t" => "uint64x1_t",
|
||||
"poly8x16_t" => "poly8x8_t",
|
||||
"poly16x8_t" => "poly16x4_t",
|
||||
"float32x4_t" => "float32x2_t",
|
||||
"float64x2_t" => "float64x1_t",
|
||||
_ => panic!("unknown half type for {}", t),
|
||||
}
|
||||
}
|
||||
|
||||
fn values(t: &str, vs: &[String]) -> String {
|
||||
if vs.len() == 1 && !t.contains('x') {
|
||||
format!(": {} = {}", t, vs[0])
|
||||
|
|
@ -588,7 +614,7 @@ fn gen_aarch64(
|
|||
in_t,
|
||||
&out_t,
|
||||
current_tests,
|
||||
[type_len(in_t[0]), type_len(in_t[0]), type_len(in_t[0])],
|
||||
[type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
|
||||
type_len(out_t),
|
||||
para_num,
|
||||
);
|
||||
|
|
@ -843,8 +869,8 @@ fn gen_arm(
|
|||
{}
|
||||
"#,
|
||||
current_comment,
|
||||
expand_intrinsic(¤t_arm, in_t[0]),
|
||||
expand_intrinsic(¤t_aarch64, in_t[0]),
|
||||
expand_intrinsic(¤t_arm, in_t[1]),
|
||||
expand_intrinsic(¤t_aarch64, in_t[1]),
|
||||
call,
|
||||
);
|
||||
let test = gen_test(
|
||||
|
|
@ -887,6 +913,8 @@ fn expand_intrinsic(intr: &str, t: &str) -> String {
|
|||
"float64x2_t" => "f64",
|
||||
"poly8x8_t" => "i8",
|
||||
"poly8x16_t" => "i8",
|
||||
"poly16x4_t" => "i16",
|
||||
"poly16x8_t" => "i16",
|
||||
/*
|
||||
"poly64x1_t" => "i64x1",
|
||||
"poly64x2_t" => "i64x2",
|
||||
|
|
@ -912,6 +940,10 @@ fn expand_intrinsic(intr: &str, t: &str) -> String {
|
|||
"uint32x4_t" => "u32",
|
||||
"uint64x1_t" => "u64",
|
||||
"uint64x2_t" => "u64",
|
||||
"poly8x8_t" => "p8",
|
||||
"poly8x16_t" => "p8",
|
||||
"poly16x4_t" => "p16",
|
||||
"poly16x8_t" => "p16",
|
||||
"float16x4_t" => "f16",
|
||||
"float16x8_t" => "f16",
|
||||
"float32x2_t" => "f32",
|
||||
|
|
@ -978,11 +1010,13 @@ fn get_call(
|
|||
} else if s.contains(':') {
|
||||
let re_params: Vec<_> = s.split(':').map(|v| v.to_string()).collect();
|
||||
if re_params[1] == "" {
|
||||
re = Some((re_params[0].clone(), in_t[0].to_string()));
|
||||
re = Some((re_params[0].clone(), in_t[1].to_string()));
|
||||
} else if re_params[1] == "in_t" {
|
||||
re = Some((re_params[0].clone(), in_t[0].to_string()));
|
||||
re = Some((re_params[0].clone(), in_t[1].to_string()));
|
||||
} else if re_params[1] == "out_t" {
|
||||
re = Some((re_params[0].clone(), out_t.to_string()));
|
||||
} else if re_params[1] == "half" {
|
||||
re = Some((re_params[0].clone(), type_to_half(in_t[1]).to_string()));
|
||||
} else {
|
||||
re = Some((re_params[0].clone(), re_params[1].clone()));
|
||||
}
|
||||
|
|
@ -996,9 +1030,20 @@ fn get_call(
|
|||
}
|
||||
if fn_name == "fixed" {
|
||||
let (re_name, re_type) = re.unwrap();
|
||||
let fixed: Vec<String> = fixed.iter().take(type_len(in_t[0])).cloned().collect();
|
||||
let fixed: Vec<String> = fixed.iter().take(type_len(in_t[1])).cloned().collect();
|
||||
return format!(r#"let {}{};"#, re_name, values(&re_type, &fixed));
|
||||
}
|
||||
if fn_name == "fixed-half-right" {
|
||||
let fixed: Vec<String> = fixed.iter().take(type_len(in_t[1])).cloned().collect();
|
||||
let half = fixed[type_len(in_t[1]) / 2..]
|
||||
.iter()
|
||||
.fold(String::new(), |mut s, fix| {
|
||||
s.push_str(fix);
|
||||
s.push_str(", ");
|
||||
s
|
||||
});
|
||||
return format!(r#"[{}]"#, &half[..half.len() - 2]);
|
||||
}
|
||||
if fn_name.contains('-') {
|
||||
let fn_format: Vec<_> = fn_name.split('-').map(|v| v.to_string()).collect();
|
||||
assert_eq!(fn_format.len(), 3);
|
||||
|
|
@ -1018,6 +1063,10 @@ fn get_call(
|
|||
} else if fn_format[1] == "noqself" {
|
||||
fn_name.push_str(type_to_noq_suffix(in_t[1]));
|
||||
} else if fn_format[1] == "nosuffix" {
|
||||
} else if fn_format[1] == "in_len" {
|
||||
fn_name.push_str(&type_len(in_t[1]).to_string());
|
||||
} else if fn_format[1] == "out_len" {
|
||||
fn_name.push_str(&type_len(out_t).to_string());
|
||||
} else {
|
||||
fn_name.push_str(&fn_format[1]);
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue