From 8b27771cc5d2f2c7c61519475a05a59c8efbbfdd Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 11 Dec 2016 20:35:28 -0500 Subject: [PATCH] progress --- library/stdarch/TODO.md | 34 +-- library/stdarch/examples/play.rs | 8 +- library/stdarch/src/x86/sse2.rs | 367 +++++++++++++++++++++++++++++++ 3 files changed, 391 insertions(+), 18 deletions(-) diff --git a/library/stdarch/TODO.md b/library/stdarch/TODO.md index 085d330af904..09d61dbc1184 100644 --- a/library/stdarch/TODO.md +++ b/library/stdarch/TODO.md @@ -139,23 +139,23 @@ sse2 * [x] `_mm_sqrt_pd` * [x] `_mm_sub_sd` * [x] `_mm_sub_pd` -* [ ] `_mm_and_pd` -* [ ] `_mm_andnot_pd` -* [ ] `_mm_or_pd` -* [ ] `_mm_xor_pd` -* [ ] `_mm_cmpeq_sd` -* [ ] `_mm_cmplt_sd` -* [ ] `_mm_cmple_sd` -* [ ] `_mm_cmpgt_sd` -* [ ] `_mm_cmpge_sd` -* [ ] `_mm_cmpord_sd` -* [ ] `_mm_cmpunord_sd` -* [ ] `_mm_cmpneq_sd` -* [ ] `_mm_cmpnlt_sd` -* [ ] `_mm_cmpnle_sd` -* [ ] `_mm_cmpngt_sd` -* [ ] `_mm_cmpnge_sd` -* [ ] `_mm_cmpeq_pd` +* [x] `_mm_and_pd` +* [x] `_mm_andnot_pd` +* [x] `_mm_or_pd` +* [x] `_mm_xor_pd` +* [x] `_mm_cmpeq_sd` +* [x] `_mm_cmplt_sd` +* [x] `_mm_cmple_sd` +* [x] `_mm_cmpgt_sd` +* [x] `_mm_cmpge_sd` +* [x] `_mm_cmpord_sd` +* [x] `_mm_cmpunord_sd` +* [x] `_mm_cmpneq_sd` +* [x] `_mm_cmpnlt_sd` +* [x] `_mm_cmpnle_sd` +* [x] `_mm_cmpngt_sd` +* [x] `_mm_cmpnge_sd` +* [x] `_mm_cmpeq_pd` * [ ] `_mm_cmplt_pd` * [ ] `_mm_cmple_pd` * [ ] `_mm_cmpgt_pd` diff --git a/library/stdarch/examples/play.rs b/library/stdarch/examples/play.rs index 89a771fc5191..74980da4d6d0 100644 --- a/library/stdarch/examples/play.rs +++ b/library/stdarch/examples/play.rs @@ -3,6 +3,11 @@ extern crate stdsimd; use std::env; use stdsimd as s; +#[inline(never)] +fn foobar(a: s::f64x2, b: s::f64x2) -> s::f64x2 { + s::_mm_cmpge_sd(a, b) +} + fn main() { let x0: f64 = env::args().nth(1).unwrap().parse().unwrap(); let x1: f64 = env::args().nth(2).unwrap().parse().unwrap(); @@ -15,6 +20,7 @@ fn main() { let a = s::f64x2::new(x0, x1); let b = s::f64x2::new(x2, x3); - let r = s::_mm_div_sd(a, b); + // let r = s::_mm_cmplt_sd(a, b); + let r = foobar(a, b); println!("{:?}", r); } diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs index 0beb2a67fba1..563e06fd14df 100644 --- a/library/stdarch/src/x86/sse2.rs +++ b/library/stdarch/src/x86/sse2.rs @@ -1240,6 +1240,163 @@ pub fn _mm_sub_pd(a: f64x2, b: f64x2) -> f64x2 { a - b } +/// Compute the bitwise AND of packed double-precision (64-bit) floating-point +/// elements in `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_and_pd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { + let a: i64x2 = mem::transmute(a); + let b: i64x2 = mem::transmute(b); + mem::transmute(a & b) + } +} + +/// Compute the bitwise NOT of `a` and then AND with `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_andnot_pd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { + let a: i64x2 = mem::transmute(a); + let b: i64x2 = mem::transmute(b); + mem::transmute((!a) & b) + } +} + +/// Compute the bitwise OR of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_or_pd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { + let a: i64x2 = mem::transmute(a); + let b: i64x2 = mem::transmute(b); + mem::transmute(a | b) + } +} + +/// Compute the bitwise OR of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_xor_pd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { + let a: i64x2 = mem::transmute(a); + let b: i64x2 = mem::transmute(b); + mem::transmute(a ^ b) + } +} + +/// Return a new vector with the low element of `a` replaced by the equality +/// comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpeq_sd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmpsd(a, b, 0) } +} + +/// Return a new vector with the low element of `a` replaced by the less-than +/// comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmplt_sd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmpsd(a, b, 1) } +} + +/// Return a new vector with the low element of `a` replaced by the +/// less-than-or-equal comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmple_sd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmpsd(a, b, 2) } +} + +/// Return a new vector with the low element of `a` replaced by the +/// greater-than comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpgt_sd(a: f64x2, b: f64x2) -> f64x2 { + _mm_cmplt_sd(b, a).insert(1, a.extract(1)) +} + +/// Return a new vector with the low element of `a` replaced by the +/// greater-than-or-equal comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpge_sd(a: f64x2, b: f64x2) -> f64x2 { + _mm_cmple_sd(b, a).insert(1, a.extract(1)) +} + +/// Return a new vector with the low element of `a` replaced by the result +/// of comparing both of the lower elements of `a` and `b` to `NaN`. If +/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` +/// otherwise. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpord_sd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmpsd(a, b, 7) } +} + +/// Return a new vector with the low element of `a` replaced by the result of +/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is +/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpunord_sd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmpsd(a, b, 3) } +} + +/// Return a new vector with the low element of `a` replaced by the not-equal +/// comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpneq_sd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmpsd(a, b, 4) } +} + +/// Return a new vector with the low element of `a` replaced by the +/// not-less-than comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpnlt_sd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmpsd(a, b, 5) } +} + +/// Return a new vector with the low element of `a` replaced by the +/// not-less-than-or-equal comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpnle_sd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmpsd(a, b, 6) } +} + +/// Return a new vector with the low element of `a` replaced by the +/// not-greater-than comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpngt_sd(a: f64x2, b: f64x2) -> f64x2 { + _mm_cmpnlt_sd(b, a).insert(1, a.extract(1)) +} + +/// Return a new vector with the low element of `a` replaced by the +/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpnge_sd(a: f64x2, b: f64x2) -> f64x2 { + _mm_cmpnle_sd(b, a).insert(1, a.extract(1)) +} + +/// Compare corresponding elements in `a` and `b` for equality. +#[inline(always)] +#[target_feature = "+sse2"] +pub fn _mm_cmpeq_pd(a: f64x2, b: f64x2) -> f64x2 { + unsafe { cmppd(a, b, 0) } +} + + + + + + + #[inline(always)] #[target_feature = "+sse2"] @@ -1357,6 +1514,10 @@ extern { fn sqrtsd(a: f64x2) -> f64x2; #[link_name = "llvm.x86.sse2.sqrt.pd"] fn sqrtpd(a: f64x2) -> f64x2; + #[link_name = "llvm.x86.sse2.cmp.sd"] + fn cmpsd(a: f64x2, b: f64x2, imm8: i8) -> f64x2; + #[link_name = "llvm.x86.sse2.cmp.pd"] + fn cmppd(a: f64x2, b: f64x2, imm8: i8) -> f64x2; } #[cfg(test)] @@ -2439,4 +2600,210 @@ mod tests { sse2::_mm_sub_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)), f64x2::new(-4.0, -8.0)); } + + #[test] + fn _mm_and_pd() { + use std::mem::transmute; + + unsafe { + let a: f64x2 = transmute(i64x2::splat(5)); + let b: f64x2 = transmute(i64x2::splat(3)); + let e: f64x2 = transmute(i64x2::splat(1)); + assert_eq!(sse2::_mm_and_pd(a, b), e); + } + } + + #[test] + fn _mm_andnot_pd() { + use std::mem::transmute; + + unsafe { + let a: f64x2 = transmute(i64x2::splat(5)); + let b: f64x2 = transmute(i64x2::splat(3)); + let e: f64x2 = transmute(i64x2::splat(2)); + assert_eq!(sse2::_mm_andnot_pd(a, b), e); + } + } + + #[test] + fn _mm_or_pd() { + use std::mem::transmute; + + unsafe { + let a: f64x2 = transmute(i64x2::splat(5)); + let b: f64x2 = transmute(i64x2::splat(3)); + let e: f64x2 = transmute(i64x2::splat(7)); + assert_eq!(sse2::_mm_or_pd(a, b), e); + } + } + + #[test] + fn _mm_xor_pd() { + use std::mem::transmute; + + unsafe { + let a: f64x2 = transmute(i64x2::splat(5)); + let b: f64x2 = transmute(i64x2::splat(3)); + let e: f64x2 = transmute(i64x2::splat(6)); + assert_eq!(sse2::_mm_xor_pd(a, b), e); + } + } + + #[test] + fn _mm_cmpeq_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); + let e = u64x2::new(!0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpeq_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmplt_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0)); + let e = u64x2::new(!0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmplt_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmple_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); + let e = u64x2::new(!0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmple_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpgt_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0)); + let e = u64x2::new(!0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpgt_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpge_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); + let e = u64x2::new(!0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpge_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpord_sd() { + use std::f64::NAN; + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0)); + let e = u64x2::new(0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpord_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpunord_sd() { + use std::f64::NAN; + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0)); + let e = u64x2::new(!0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpunord_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpneq_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0)); + let e = u64x2::new(!0u64, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpneq_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpnlt_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0)); + let e = u64x2::new(0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpnlt_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpnle_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); + let e = u64x2::new(0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpnle_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpngt_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0)); + let e = u64x2::new(0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpngt_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpnge_sd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); + let e = u64x2::new(0, transmute(2.0f64)); + let r: u64x2 = transmute(sse2::_mm_cmpnge_sd(a, b)); + assert_eq!(r, e); + } + } + + #[test] + fn _mm_cmpeq_pd() { + use std::mem::transmute; + + unsafe { + let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0)); + let e = u64x2::new(!0, 0); + let r: u64x2 = transmute(sse2::_mm_cmpeq_pd(a, b)); + assert_eq!(r, e); + } + } }