Add vmull_p64 and vmull_high_p64 for aarch64 (#1157)
This commit is contained in:
parent
4a21f4db0e
commit
09a05e02f4
2 changed files with 45 additions and 5 deletions
|
|
@ -4646,6 +4646,19 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
|
|||
vmull_u32(a, b)
|
||||
}
|
||||
|
||||
/// Polynomial multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,crypto")]
|
||||
#[cfg_attr(test, assert_instr(pmull))]
|
||||
pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")]
|
||||
fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
|
||||
}
|
||||
transmute(vmull_p64_(a, b))
|
||||
}
|
||||
|
||||
/// Polynomial multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -4656,6 +4669,14 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
|
|||
vmull_p8(a, b)
|
||||
}
|
||||
|
||||
/// Polynomial multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,crypto")]
|
||||
#[cfg_attr(test, assert_instr(pmull))]
|
||||
pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
|
||||
vmull_p64(simd_extract(a, 1), simd_extract(b, 1))
|
||||
}
|
||||
|
||||
/// Multiply long
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -12612,6 +12633,15 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_p64() {
|
||||
let a: p64 = 15;
|
||||
let b: p64 = 3;
|
||||
let e: p128 = 17;
|
||||
let r: p128 = transmute(vmull_p64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_p8() {
|
||||
let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
|
@ -12621,6 +12651,15 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_p64() {
|
||||
let a: i64x2 = i64x2::new(1, 15);
|
||||
let b: i64x2 = i64x2::new(1, 3);
|
||||
let e: p128 = 17;
|
||||
let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vmull_high_n_s16() {
|
||||
let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
|
||||
|
|
|
|||
|
|
@ -2214,9 +2214,10 @@ target = crypto
|
|||
|
||||
aarch64 = pmull
|
||||
link-aarch64 = pmull64:p64:p64:p64:int8x16_t
|
||||
arm = vmull
|
||||
link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
|
||||
//generate p64:p64:p128
|
||||
// Because of the support status of llvm, vmull_p64 is currently only available on aarch64
|
||||
// arm = vmull
|
||||
// link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
|
||||
generate p64:p64:p128
|
||||
|
||||
|
||||
/// Polynomial multiply long
|
||||
|
|
@ -2242,8 +2243,8 @@ b = 1, 3
|
|||
validate 17
|
||||
target = crypto
|
||||
|
||||
aarch64 = pmull2
|
||||
//generate poly64x2_t:poly64x2_t:p128
|
||||
aarch64 = pmull
|
||||
generate poly64x2_t:poly64x2_t:p128
|
||||
|
||||
/// Vector long multiply with scalar
|
||||
name = vmull
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue