From 8b27771cc5d2f2c7c61519475a05a59c8efbbfdd Mon Sep 17 00:00:00 2001
From: Andrew Gallant <jamslam@gmail.com>
Date: Sun, 11 Dec 2016 20:35:28 -0500
Subject: [PATCH] progress

---
 library/stdarch/TODO.md          |  34 +--
 library/stdarch/examples/play.rs |   8 +-
 library/stdarch/src/x86/sse2.rs  | 367 +++++++++++++++++++++++++++++++
 3 files changed, 391 insertions(+), 18 deletions(-)

diff --git a/library/stdarch/TODO.md b/library/stdarch/TODO.md
index 085d330af904..09d61dbc1184 100644
--- a/library/stdarch/TODO.md
+++ b/library/stdarch/TODO.md
@@ -139,23 +139,23 @@ sse2
 * [x] `_mm_sqrt_pd`
 * [x] `_mm_sub_sd`
 * [x] `_mm_sub_pd`
-* [ ] `_mm_and_pd`
-* [ ] `_mm_andnot_pd`
-* [ ] `_mm_or_pd`
-* [ ] `_mm_xor_pd`
-* [ ] `_mm_cmpeq_sd`
-* [ ] `_mm_cmplt_sd`
-* [ ] `_mm_cmple_sd`
-* [ ] `_mm_cmpgt_sd`
-* [ ] `_mm_cmpge_sd`
-* [ ] `_mm_cmpord_sd`
-* [ ] `_mm_cmpunord_sd`
-* [ ] `_mm_cmpneq_sd`
-* [ ] `_mm_cmpnlt_sd`
-* [ ] `_mm_cmpnle_sd`
-* [ ] `_mm_cmpngt_sd`
-* [ ] `_mm_cmpnge_sd`
-* [ ] `_mm_cmpeq_pd`
+* [x] `_mm_and_pd`
+* [x] `_mm_andnot_pd`
+* [x] `_mm_or_pd`
+* [x] `_mm_xor_pd`
+* [x] `_mm_cmpeq_sd`
+* [x] `_mm_cmplt_sd`
+* [x] `_mm_cmple_sd`
+* [x] `_mm_cmpgt_sd`
+* [x] `_mm_cmpge_sd`
+* [x] `_mm_cmpord_sd`
+* [x] `_mm_cmpunord_sd`
+* [x] `_mm_cmpneq_sd`
+* [x] `_mm_cmpnlt_sd`
+* [x] `_mm_cmpnle_sd`
+* [x] `_mm_cmpngt_sd`
+* [x] `_mm_cmpnge_sd`
+* [x] `_mm_cmpeq_pd`
 * [ ] `_mm_cmplt_pd`
 * [ ] `_mm_cmple_pd`
 * [ ] `_mm_cmpgt_pd`
diff --git a/library/stdarch/examples/play.rs b/library/stdarch/examples/play.rs
index 89a771fc5191..74980da4d6d0 100644
--- a/library/stdarch/examples/play.rs
+++ b/library/stdarch/examples/play.rs
@@ -3,6 +3,11 @@ extern crate stdsimd;
 use std::env;
 use stdsimd as s;
 
+#[inline(never)]
+fn foobar(a: s::f64x2, b: s::f64x2) -> s::f64x2 {
+    s::_mm_cmpge_sd(a, b)
+}
+
 fn main() {
     let x0: f64 = env::args().nth(1).unwrap().parse().unwrap();
     let x1: f64 = env::args().nth(2).unwrap().parse().unwrap();
@@ -15,6 +20,7 @@ fn main() {
 
     let a = s::f64x2::new(x0, x1);
     let b = s::f64x2::new(x2, x3);
-    let r = s::_mm_div_sd(a, b);
+    // let r = s::_mm_cmplt_sd(a, b);
+    let r = foobar(a, b);
     println!("{:?}", r);
 }
diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs
index 0beb2a67fba1..563e06fd14df 100644
--- a/library/stdarch/src/x86/sse2.rs
+++ b/library/stdarch/src/x86/sse2.rs
@@ -1240,6 +1240,163 @@ pub fn _mm_sub_pd(a: f64x2, b: f64x2) -> f64x2 {
     a - b
 }
 
+/// Compute the bitwise AND of packed double-precision (64-bit) floating-point
+/// elements in `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_and_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe {
+        let a: i64x2 = mem::transmute(a);
+        let b: i64x2 = mem::transmute(b);
+        mem::transmute(a & b)
+    }
+}
+
+/// Compute the bitwise NOT of `a` and then AND with `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_andnot_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe {
+        let a: i64x2 = mem::transmute(a);
+        let b: i64x2 = mem::transmute(b);
+        mem::transmute((!a) & b)
+    }
+}
+
+/// Compute the bitwise OR of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_or_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe {
+        let a: i64x2 = mem::transmute(a);
+        let b: i64x2 = mem::transmute(b);
+        mem::transmute(a | b)
+    }
+}
+
+/// Compute the bitwise OR of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_xor_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe {
+        let a: i64x2 = mem::transmute(a);
+        let b: i64x2 = mem::transmute(b);
+        mem::transmute(a ^ b)
+    }
+}
+
+/// Return a new vector with the low element of `a` replaced by the equality
+/// comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpeq_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 0) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the less-than
+/// comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmplt_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 1) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// less-than-or-equal comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmple_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 2) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// greater-than comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpgt_sd(a: f64x2, b: f64x2) -> f64x2 {
+    _mm_cmplt_sd(b, a).insert(1, a.extract(1))
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpge_sd(a: f64x2, b: f64x2) -> f64x2 {
+    _mm_cmple_sd(b, a).insert(1, a.extract(1))
+}
+
+/// Return a new vector with the low element of `a` replaced by the result
+/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
+/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
+/// otherwise.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpord_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 7) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the result of
+/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
+/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpunord_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 3) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the not-equal
+/// comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpneq_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 4) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// not-less-than comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpnlt_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 5) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpnle_sd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmpsd(a, b, 6) }
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// not-greater-than comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpngt_sd(a: f64x2, b: f64x2) -> f64x2 {
+    _mm_cmpnlt_sd(b, a).insert(1, a.extract(1))
+}
+
+/// Return a new vector with the low element of `a` replaced by the
+/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpnge_sd(a: f64x2, b: f64x2) -> f64x2 {
+    _mm_cmpnle_sd(b, a).insert(1, a.extract(1))
+}
+
+/// Compare corresponding elements in `a` and `b` for equality.
+#[inline(always)]
+#[target_feature = "+sse2"]
+pub fn _mm_cmpeq_pd(a: f64x2, b: f64x2) -> f64x2 {
+    unsafe { cmppd(a, b, 0) }
+}
+
+
+
+
+
+
+
 
 #[inline(always)]
 #[target_feature = "+sse2"]
@@ -1357,6 +1514,10 @@ extern {
     fn sqrtsd(a: f64x2) -> f64x2;
     #[link_name = "llvm.x86.sse2.sqrt.pd"]
     fn sqrtpd(a: f64x2) -> f64x2;
+    #[link_name = "llvm.x86.sse2.cmp.sd"]
+    fn cmpsd(a: f64x2, b: f64x2, imm8: i8) -> f64x2;
+    #[link_name = "llvm.x86.sse2.cmp.pd"]
+    fn cmppd(a: f64x2, b: f64x2, imm8: i8) -> f64x2;
 }
 
 #[cfg(test)]
@@ -2439,4 +2600,210 @@ mod tests {
             sse2::_mm_sub_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)),
             f64x2::new(-4.0, -8.0));
     }
+
+    #[test]
+    fn _mm_and_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let a: f64x2 = transmute(i64x2::splat(5));
+            let b: f64x2 = transmute(i64x2::splat(3));
+            let e: f64x2 = transmute(i64x2::splat(1));
+            assert_eq!(sse2::_mm_and_pd(a, b), e);
+        }
+    }
+
+    #[test]
+    fn _mm_andnot_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let a: f64x2 = transmute(i64x2::splat(5));
+            let b: f64x2 = transmute(i64x2::splat(3));
+            let e: f64x2 = transmute(i64x2::splat(2));
+            assert_eq!(sse2::_mm_andnot_pd(a, b), e);
+        }
+    }
+
+    #[test]
+    fn _mm_or_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let a: f64x2 = transmute(i64x2::splat(5));
+            let b: f64x2 = transmute(i64x2::splat(3));
+            let e: f64x2 = transmute(i64x2::splat(7));
+            assert_eq!(sse2::_mm_or_pd(a, b), e);
+        }
+    }
+
+    #[test]
+    fn _mm_xor_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let a: f64x2 = transmute(i64x2::splat(5));
+            let b: f64x2 = transmute(i64x2::splat(3));
+            let e: f64x2 = transmute(i64x2::splat(6));
+            assert_eq!(sse2::_mm_xor_pd(a, b), e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpeq_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpeq_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmplt_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmplt_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmple_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmple_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpgt_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpgt_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpge_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpge_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpord_sd() {
+        use std::f64::NAN;
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpord_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpunord_sd() {
+        use std::f64::NAN;
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(!0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpunord_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpneq_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(!0u64, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpneq_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpnlt_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpnlt_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpnle_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpnle_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpngt_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpngt_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpnge_sd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(0, transmute(2.0f64));
+            let r: u64x2 = transmute(sse2::_mm_cmpnge_sd(a, b));
+            assert_eq!(r, e);
+        }
+    }
+
+    #[test]
+    fn _mm_cmpeq_pd() {
+        use std::mem::transmute;
+
+        unsafe {
+            let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
+            let e = u64x2::new(!0, 0);
+            let r: u64x2 = transmute(sse2::_mm_cmpeq_pd(a, b));
+            assert_eq!(r, e);
+        }
+    }
 }