Add vmull_p64 and vmull_high_p64 for aarch64 (#1157)

2021-05-16 04:58:23 +08:00 · 2021-05-16 04:58:23 +08:00 · 09a05e02f4
commit 09a05e02f4
parent 4a21f4db0e
2 changed files with 45 additions and 5 deletions
--- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs
@ -4646,6 +4646,19 @@ pub unsafe fn vmull_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t {
    vmull_u32(a, b)
 }

+/// Polynomial multiply long
+#[inline]
+#[target_feature(enable = "neon,crypto")]
+#[cfg_attr(test, assert_instr(pmull))]
+pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
+    #[allow(improper_ctypes)]
+    extern "C" {
+        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull64")]
+        fn vmull_p64_(a: p64, b: p64) -> int8x16_t;
+    }
+    transmute(vmull_p64_(a, b))
+}
+
 /// Polynomial multiply long
 #[inline]
 #[target_feature(enable = "neon")]
@ -4656,6 +4669,14 @@ pub unsafe fn vmull_high_p8(a: poly8x16_t, b: poly8x16_t) -> poly16x8_t {
    vmull_p8(a, b)
 }

+/// Polynomial multiply long
+#[inline]
+#[target_feature(enable = "neon,crypto")]
+#[cfg_attr(test, assert_instr(pmull))]
+pub unsafe fn vmull_high_p64(a: poly64x2_t, b: poly64x2_t) -> p128 {
+    vmull_p64(simd_extract(a, 1), simd_extract(b, 1))
+}
+
 /// Multiply long
 #[inline]
 #[target_feature(enable = "neon")]
@ -12612,6 +12633,15 @@ mod test {
        assert_eq!(r, e);
    }

+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_p64() {
+        let a: p64 = 15;
+        let b: p64 = 3;
+        let e: p128 = 17;
+        let r: p128 = transmute(vmull_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
    #[simd_test(enable = "neon")]
    unsafe fn test_vmull_high_p8() {
        let a: i8x16 = i8x16::new(1, 2, 9, 10, 9, 10, 11, 12, 9, 10, 11, 12, 13, 14, 15, 16);
@ -12621,6 +12651,15 @@ mod test {
        assert_eq!(r, e);
    }

+    #[simd_test(enable = "neon")]
+    unsafe fn test_vmull_high_p64() {
+        let a: i64x2 = i64x2::new(1, 15);
+        let b: i64x2 = i64x2::new(1, 3);
+        let e: p128 = 17;
+        let r: p128 = transmute(vmull_high_p64(transmute(a), transmute(b)));
+        assert_eq!(r, e);
+    }
+
    #[simd_test(enable = "neon")]
    unsafe fn test_vmull_high_n_s16() {
        let a: i16x8 = i16x8::new(1, 2, 9, 10, 9, 10, 11, 12);
--- a/library/stdarch/crates/stdarch-gen/neon.spec
+++ b/library/stdarch/crates/stdarch-gen/neon.spec
@ -2214,9 +2214,10 @@ target = crypto

 aarch64 = pmull
 link-aarch64 = pmull64:p64:p64:p64:int8x16_t
-arm = vmull
-link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
-//generate p64:p64:p128
+// Because of the support status of llvm, vmull_p64 is currently only available on aarch64
+// arm = vmull
+// link-arm = vmullp.v2i64:int64x1_t:int64x1_t:int64x1_t:int64x2_t
+generate p64:p64:p128


 /// Polynomial multiply long
@ -2242,8 +2243,8 @@ b = 1, 3
 validate 17
 target = crypto

-aarch64 = pmull2
-//generate poly64x2_t:poly64x2_t:p128
+aarch64 = pmull
+generate poly64x2_t:poly64x2_t:p128

 /// Vector long multiply with scalar
 name = vmull