Add vmull_p64 and vmull_high_p64 for aarch64 (#1157)

This commit is contained in:
Sparrow Li 2021-05-16 04:58:23 +08:00 committed by GitHub
parent 4a21f4db0e
commit 09a05e02f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 5 deletions

View file

@ -4646,6 +4646,19 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
vmull_u32(a, b)
}
/// Polynomial multiply long
#[inline]
#[target_feature(enable = "neon,crypto")]
#[cfg_attr(test, assert_instr(pmull))]
pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")]
fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
}
transmute(vmull_p64_(a, b))
}
/// Polynomial multiply long
#[inline]
#[target_feature(enable = "neon")]
@ -4656,6 +4669,14 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
vmull_p8(a, b)
}
/// Polynomial multiply long
#[inline]
#[target_feature(enable = "neon,crypto")]
#[cfg_attr(test, assert_instr(pmull))]
pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
vmull_p64(simd_extract(a, 1), simd_extract(b, 1))
}
/// Multiply long
#[inline]
#[target_feature(enable = "neon")]
@ -12612,6 +12633,15 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_p64() {
let a: p64 = 15;
let b: p64 = 3;
let e: p128 = 17;
let r: p128 = transmute(vmull_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_p8() {
let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
@ -12621,6 +12651,15 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_p64() {
let a: i64x2 = i64x2::new(1, 15);
let b: i64x2 = i64x2::new(1, 3);
let e: p128 = 17;
let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmull_high_n_s16() {
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);

View file

@ -2214,9 +2214,10 @@ target = crypto
aarch64 = pmull
link-aarch64 = pmull64:p64:p64:p64:int8x16_t
arm = vmull
link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
//generate p64:p64:p128
// Because of the support status of llvm, vmull_p64 is currently only available on aarch64
// arm = vmull
// link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
generate p64:p64:p128
/// Polynomial multiply long
@ -2242,8 +2243,8 @@ b = 1, 3
validate 17
target = crypto
aarch64 = pmull2
//generate poly64x2_t:poly64x2_t:p128
aarch64 = pmull
generate poly64x2_t:poly64x2_t:p128
/// Vector long multiply with scalar
name = vmull