progress

2016-12-11 20:35:28 -05:00 · 2016-12-11 20:35:28 -05:00 · 8b27771cc5
commit 8b27771cc5
parent c01240299f
3 changed files with 391 additions and 18 deletions
--- a/library/stdarch/TODO.md
+++ b/library/stdarch/TODO.md
@ -139,23 +139,23 @@ sse2
 * [x] `_mm_sqrt_pd`
 * [x] `_mm_sub_sd`
 * [x] `_mm_sub_pd`
-* [ ] `_mm_and_pd`
-* [ ] `_mm_andnot_pd`
-* [ ] `_mm_or_pd`
-* [ ] `_mm_xor_pd`
-* [ ] `_mm_cmpeq_sd`
-* [ ] `_mm_cmplt_sd`
-* [ ] `_mm_cmple_sd`
-* [ ] `_mm_cmpgt_sd`
-* [ ] `_mm_cmpge_sd`
-* [ ] `_mm_cmpord_sd`
-* [ ] `_mm_cmpunord_sd`
-* [ ] `_mm_cmpneq_sd`
-* [ ] `_mm_cmpnlt_sd`
-* [ ] `_mm_cmpnle_sd`
-* [ ] `_mm_cmpngt_sd`
-* [ ] `_mm_cmpnge_sd`
-* [ ] `_mm_cmpeq_pd`
+* [x] `_mm_and_pd`
+* [x] `_mm_andnot_pd`
+* [x] `_mm_or_pd`
+* [x] `_mm_xor_pd`
+* [x] `_mm_cmpeq_sd`
+* [x] `_mm_cmplt_sd`
+* [x] `_mm_cmple_sd`
+* [x] `_mm_cmpgt_sd`
+* [x] `_mm_cmpge_sd`
+* [x] `_mm_cmpord_sd`
+* [x] `_mm_cmpunord_sd`
+* [x] `_mm_cmpneq_sd`
+* [x] `_mm_cmpnlt_sd`
+* [x] `_mm_cmpnle_sd`
+* [x] `_mm_cmpngt_sd`
+* [x] `_mm_cmpnge_sd`
+* [x] `_mm_cmpeq_pd`
 * [ ] `_mm_cmplt_pd`
 * [ ] `_mm_cmple_pd`
 * [ ] `_mm_cmpgt_pd`
--- a/library/stdarch/examples/play.rs
+++ b/library/stdarch/examples/play.rs
@ -3,6 +3,11 @@ extern crate stdsimd;
 use std::env;
 use stdsimd as s;

+#[inline(never)]
+fn foobar(a: s::f64x2, b: s::f64x2) -> s::f64x2 {
+    s::_mm_cmpge_sd(a, b)
+}
+
 fn main() {
    let x0: f64 = env::args().nth(1).unwrap().parse().unwrap();
    let x1: f64 = env::args().nth(2).unwrap().parse().unwrap();
@ -15,6 +20,7 @@ fn main() {

    let a = s::f64x2::new(x0, x1);
    let b = s::f64x2::new(x2, x3);
-    let r = s::_mm_div_sd(a, b);
+    // let r = s::_mm_cmplt_sd(a, b);
+    let r = foobar(a, b);
    println!("{:?}", r);
 }
--- a/library/stdarch/src/x86/sse2.rs
+++ b/library/stdarch/src/x86/sse2.rs
@ -1240,6 +1240,163 @@ pub fn _mm_sub_pd(a: f64x2, b: f64x2) -> f64x2 {
    a - b
 }

+/// Compute the bitwise AND of packed double-precision (64-bit) floating-point
+/// elements in `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_and_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe {
+        let a: i64x2 = mem::transmute(a);
+        let b: i64x2 = mem::transmute(b);
+        mem::transmute(a & b)
+    }
+}
+
+/// Compute the bitwise NOT of `a` and then AND with `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_andnot_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe {
+        let a: i64x2 = mem::transmute(a);
+        let b: i64x2 = mem::transmute(b);
+        mem::transmute((!a) & b)
+    }
+}
+
+/// Compute the bitwise OR of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_or_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe {
+        let a: i64x2 = mem::transmute(a);
+        let b: i64x2 = mem::transmute(b);
+        mem::transmute(a | b)
+    }
+}
+
+/// Compute the bitwise OR of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_xor_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe {
+        let a: i64x2 = mem::transmute(a);
+        let b: i64x2 = mem::transmute(b);
+        mem::transmute(a ^ b)
+    }
+}
+
+/// Return a new vector with the low element of `a` replaced by the equality
+/// comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpeq_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 0) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the less-than
+/// comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmplt_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 1) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// less-than-or-equal comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmple_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 2) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// greater-than comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpgt_sd(a: f64x2, b: f64x2) -> f64x2 {
+    _mm_cmplt_sd(b, a).insert(1, a.extract(1))
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpge_sd(a: f64x2, b: f64x2) -> f64x2 {
+    _mm_cmple_sd(b, a).insert(1, a.extract(1))
+}
+
+/// Return a new vector with the low element of `a` replaced by the result
+/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
+/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
+/// otherwise.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpord_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 7) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the result of
+/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
+/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpunord_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 3) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the not-equal
+/// comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpneq_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 4) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// not-less-than comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpnlt_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 5) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpnle_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 6) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// not-greater-than comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpngt_sd(a: f64x2, b: f64x2) -> f64x2 {
+    _mm_cmpnlt_sd(b, a).insert(1, a.extract(1))
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpnge_sd(a: f64x2, b: f64x2) -> f64x2 {
+    _mm_cmpnle_sd(b, a).insert(1, a.extract(1))
+}
+
+/// Compare corresponding elements in `a` and `b` for equality.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpeq_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmppd(a, b, 0) }
+}
+
+
+
+
+
+
+

 #[inline(always)]
 #[target_feature = "+sse2"]
@ -1357,6 +1514,10 @@ extern {
    fn sqrtsd(a: f64x2) -> f64x2;
    #[link_name = "llvm.x86.sse2.sqrt.pd"]
    fn sqrtpd(a: f64x2) -> f64x2;
+    #[link_name = "llvm.x86.sse2.cmp.sd"]
+    fn cmpsd(a: f64x2, b: f64x2, imm8: i8) -> f64x2;
+    #[link_name = "llvm.x86.sse2.cmp.pd"]
+    fn cmppd(a: f64x2, b: f64x2, imm8: i8) -> f64x2;
 }

 #[cfg(test)]
@ -2439,4 +2600,210 @@ mod tests {
            sse2::_mm_sub_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)),
            f64x2::new(-4.0, -8.0));
    }
+
+    #[test]
+    fn _mm_and_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let a: f64x2 = transmute(i64x2::splat(5));
+            let b: f64x2 = transmute(i64x2::splat(3));
+            let e: f64x2 = transmute(i64x2::splat(1));
+            assert_eq!(sse2::_mm_and_pd(a, b), e);
+        }
+    }
+
+    #[test]
+    fn _mm_andnot_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let a: f64x2 = transmute(i64x2::splat(5));
+            let b: f64x2 = transmute(i64x2::splat(3));
+            let e: f64x2 = transmute(i64x2::splat(2));
+            assert_eq!(sse2::_mm_andnot_pd(a, b), e);
+        }
+    }
+
+    #[test]
+    fn _mm_or_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let a: f64x2 = transmute(i64x2::splat(5));
+            let b: f64x2 = transmute(i64x2::splat(3));
+            let e: f64x2 = transmute(i64x2::splat(7));
+            assert_eq!(sse2::_mm_or_pd(a, b), e);
+        }
+    }
+
+    #[test]
+    fn _mm_xor_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let a: f64x2 = transmute(i64x2::splat(5));
+            let b: f64x2 = transmute(i64x2::splat(3));
+            let e: f64x2 = transmute(i64x2::splat(6));
+            assert_eq!(sse2::_mm_xor_pd(a, b), e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpeq_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpeq_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmplt_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmplt_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmple_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmple_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpgt_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpgt_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpge_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpge_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpord_sd() {
+        use std::f64::NAN;
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpord_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpunord_sd() {
+        use std::f64::NAN;
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpunord_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpneq_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(!0u64, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpneq_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpnlt_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpnlt_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpnle_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpnle_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpngt_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpngt_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpnge_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpnge_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpeq_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, 0);
+            let r: u64x2 = transmute(sse2::_mm_cmpeq_pd(a, b));
+            assert_eq!(r, e);
+        }
+    }
 }