sse3: _mm_addsub_pd

2017-09-30 11:54:30 +02:00 · 2017-09-30 11:54:30 +02:00 · dc684dc221
commit dc684dc221
parent fff98467f3
1 changed files with 20 additions and 1 deletions
--- a/library/stdarch/src/x86/sse3.rs
+++ b/library/stdarch/src/x86/sse3.rs
@ -1,5 +1,5 @@
 use x86::__m128i;
-use v128::f32x4;
+use v128::*;

 #[cfg(test)]
 use stdsimd_test::assert_instr;
@ -13,6 +13,15 @@ pub unsafe fn _mm_addsub_ps(a: f32x4, b: f32x4) -> f32x4 {
    addsubps(a, b)
 }

+/// Alternatively add and subtract packed double-precision (64-bit)
+/// floating-point elements in `a` to/from packed elements in `b`.
+#[inline(always)]
+#[target_feature = "+sse3"]
+#[cfg_attr(test, assert_instr(addsub))]
+pub unsafe fn _mm_addsub_pd(a: f64x2, b: f64x2) -> f64x2 {
+    addsubpd(a, b)
+}
+
 /// Load 128-bits of integer data from unaligned memory.
 /// This intrinsic may perform better than `_mm_loadu_si128`
 /// when the data crosses a cache line boundary.
@ -27,6 +36,8 @@ pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
 extern {
    #[link_name = "llvm.x86.sse3.addsub.ps"]
    fn addsubps(a: f32x4, b: f32x4) -> f32x4;
+    #[link_name = "llvm.x86.sse3.addsub.pd"]
+    fn addsubpd(a: f64x2, b: f64x2) -> f64x2;
    #[link_name = "llvm.x86.sse3.ldu.dq"]
    fn lddqu(mem_addr: *const i8) -> __m128i;
 }
@ -47,6 +58,14 @@ mod tests {
        assert_eq!(r, f32x4::new(99.0, 25.0, 0.0, -15.0));
    }

+    #[simd_test = "sse3"]
+    unsafe fn _mm_addsub_pd() {
+        let a = f64x2::new(-1.0, 5.0);
+        let b = f64x2::new(-100.0, 20.0);
+        let r = sse3::_mm_addsub_pd(a, b);
+        assert_eq!(r, f64x2::new(99.0, 25.0));
+    }
+
    #[simd_test = "sse3"]
    unsafe fn _mm_lddqu_si128() {
        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);