progress
This commit is contained in:
parent
c01240299f
commit
8b27771cc5
3 changed files with 391 additions and 18 deletions
|
|
@ -139,23 +139,23 @@ sse2
|
|||
* [x] `_mm_sqrt_pd`
|
||||
* [x] `_mm_sub_sd`
|
||||
* [x] `_mm_sub_pd`
|
||||
* [ ] `_mm_and_pd`
|
||||
* [ ] `_mm_andnot_pd`
|
||||
* [ ] `_mm_or_pd`
|
||||
* [ ] `_mm_xor_pd`
|
||||
* [ ] `_mm_cmpeq_sd`
|
||||
* [ ] `_mm_cmplt_sd`
|
||||
* [ ] `_mm_cmple_sd`
|
||||
* [ ] `_mm_cmpgt_sd`
|
||||
* [ ] `_mm_cmpge_sd`
|
||||
* [ ] `_mm_cmpord_sd`
|
||||
* [ ] `_mm_cmpunord_sd`
|
||||
* [ ] `_mm_cmpneq_sd`
|
||||
* [ ] `_mm_cmpnlt_sd`
|
||||
* [ ] `_mm_cmpnle_sd`
|
||||
* [ ] `_mm_cmpngt_sd`
|
||||
* [ ] `_mm_cmpnge_sd`
|
||||
* [ ] `_mm_cmpeq_pd`
|
||||
* [x] `_mm_and_pd`
|
||||
* [x] `_mm_andnot_pd`
|
||||
* [x] `_mm_or_pd`
|
||||
* [x] `_mm_xor_pd`
|
||||
* [x] `_mm_cmpeq_sd`
|
||||
* [x] `_mm_cmplt_sd`
|
||||
* [x] `_mm_cmple_sd`
|
||||
* [x] `_mm_cmpgt_sd`
|
||||
* [x] `_mm_cmpge_sd`
|
||||
* [x] `_mm_cmpord_sd`
|
||||
* [x] `_mm_cmpunord_sd`
|
||||
* [x] `_mm_cmpneq_sd`
|
||||
* [x] `_mm_cmpnlt_sd`
|
||||
* [x] `_mm_cmpnle_sd`
|
||||
* [x] `_mm_cmpngt_sd`
|
||||
* [x] `_mm_cmpnge_sd`
|
||||
* [x] `_mm_cmpeq_pd`
|
||||
* [ ] `_mm_cmplt_pd`
|
||||
* [ ] `_mm_cmple_pd`
|
||||
* [ ] `_mm_cmpgt_pd`
|
||||
|
|
|
|||
|
|
@ -3,6 +3,11 @@ extern crate stdsimd;
|
|||
use std::env;
|
||||
use stdsimd as s;
|
||||
|
||||
#[inline(never)]
|
||||
fn foobar(a: s::f64x2, b: s::f64x2) -> s::f64x2 {
|
||||
s::_mm_cmpge_sd(a, b)
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let x0: f64 = env::args().nth(1).unwrap().parse().unwrap();
|
||||
let x1: f64 = env::args().nth(2).unwrap().parse().unwrap();
|
||||
|
|
@ -15,6 +20,7 @@ fn main() {
|
|||
|
||||
let a = s::f64x2::new(x0, x1);
|
||||
let b = s::f64x2::new(x2, x3);
|
||||
let r = s::_mm_div_sd(a, b);
|
||||
// let r = s::_mm_cmplt_sd(a, b);
|
||||
let r = foobar(a, b);
|
||||
println!("{:?}", r);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1240,6 +1240,163 @@ pub fn _mm_sub_pd(a: f64x2, b: f64x2) -> f64x2 {
|
|||
a - b
|
||||
}
|
||||
|
||||
/// Compute the bitwise AND of packed double-precision (64-bit) floating-point
|
||||
/// elements in `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_and_pd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe {
|
||||
let a: i64x2 = mem::transmute(a);
|
||||
let b: i64x2 = mem::transmute(b);
|
||||
mem::transmute(a & b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the bitwise NOT of `a` and then AND with `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_andnot_pd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe {
|
||||
let a: i64x2 = mem::transmute(a);
|
||||
let b: i64x2 = mem::transmute(b);
|
||||
mem::transmute((!a) & b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the bitwise OR of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_or_pd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe {
|
||||
let a: i64x2 = mem::transmute(a);
|
||||
let b: i64x2 = mem::transmute(b);
|
||||
mem::transmute(a | b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the bitwise OR of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_xor_pd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe {
|
||||
let a: i64x2 = mem::transmute(a);
|
||||
let b: i64x2 = mem::transmute(b);
|
||||
mem::transmute(a ^ b)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the equality
|
||||
/// comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpeq_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmpsd(a, b, 0) }
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the less-than
|
||||
/// comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmplt_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmpsd(a, b, 1) }
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the
|
||||
/// less-than-or-equal comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmple_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmpsd(a, b, 2) }
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the
|
||||
/// greater-than comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpgt_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
_mm_cmplt_sd(b, a).insert(1, a.extract(1))
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the
|
||||
/// greater-than-or-equal comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpge_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
_mm_cmple_sd(b, a).insert(1, a.extract(1))
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the result
|
||||
/// of comparing both of the lower elements of `a` and `b` to `NaN`. If
|
||||
/// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
|
||||
/// otherwise.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpord_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmpsd(a, b, 7) }
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the result of
|
||||
/// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
|
||||
/// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpunord_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmpsd(a, b, 3) }
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the not-equal
|
||||
/// comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpneq_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmpsd(a, b, 4) }
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the
|
||||
/// not-less-than comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpnlt_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmpsd(a, b, 5) }
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the
|
||||
/// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpnle_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmpsd(a, b, 6) }
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the
|
||||
/// not-greater-than comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpngt_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
_mm_cmpnlt_sd(b, a).insert(1, a.extract(1))
|
||||
}
|
||||
|
||||
/// Return a new vector with the low element of `a` replaced by the
|
||||
/// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpnge_sd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
_mm_cmpnle_sd(b, a).insert(1, a.extract(1))
|
||||
}
|
||||
|
||||
/// Compare corresponding elements in `a` and `b` for equality.
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
pub fn _mm_cmpeq_pd(a: f64x2, b: f64x2) -> f64x2 {
|
||||
unsafe { cmppd(a, b, 0) }
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#[inline(always)]
|
||||
#[target_feature = "+sse2"]
|
||||
|
|
@ -1357,6 +1514,10 @@ extern {
|
|||
fn sqrtsd(a: f64x2) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse2.sqrt.pd"]
|
||||
fn sqrtpd(a: f64x2) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse2.cmp.sd"]
|
||||
fn cmpsd(a: f64x2, b: f64x2, imm8: i8) -> f64x2;
|
||||
#[link_name = "llvm.x86.sse2.cmp.pd"]
|
||||
fn cmppd(a: f64x2, b: f64x2, imm8: i8) -> f64x2;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -2439,4 +2600,210 @@ mod tests {
|
|||
sse2::_mm_sub_pd(f64x2::new(1.0, 2.0), f64x2::new(5.0, 10.0)),
|
||||
f64x2::new(-4.0, -8.0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_and_pd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let a: f64x2 = transmute(i64x2::splat(5));
|
||||
let b: f64x2 = transmute(i64x2::splat(3));
|
||||
let e: f64x2 = transmute(i64x2::splat(1));
|
||||
assert_eq!(sse2::_mm_and_pd(a, b), e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_andnot_pd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let a: f64x2 = transmute(i64x2::splat(5));
|
||||
let b: f64x2 = transmute(i64x2::splat(3));
|
||||
let e: f64x2 = transmute(i64x2::splat(2));
|
||||
assert_eq!(sse2::_mm_andnot_pd(a, b), e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_or_pd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let a: f64x2 = transmute(i64x2::splat(5));
|
||||
let b: f64x2 = transmute(i64x2::splat(3));
|
||||
let e: f64x2 = transmute(i64x2::splat(7));
|
||||
assert_eq!(sse2::_mm_or_pd(a, b), e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_xor_pd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let a: f64x2 = transmute(i64x2::splat(5));
|
||||
let b: f64x2 = transmute(i64x2::splat(3));
|
||||
let e: f64x2 = transmute(i64x2::splat(6));
|
||||
assert_eq!(sse2::_mm_xor_pd(a, b), e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpeq_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
|
||||
let e = u64x2::new(!0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpeq_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmplt_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
|
||||
let e = u64x2::new(!0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmplt_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmple_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
|
||||
let e = u64x2::new(!0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmple_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpgt_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0));
|
||||
let e = u64x2::new(!0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpgt_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpge_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
|
||||
let e = u64x2::new(!0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpge_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpord_sd() {
|
||||
use std::f64::NAN;
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0));
|
||||
let e = u64x2::new(0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpord_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpunord_sd() {
|
||||
use std::f64::NAN;
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(NAN, 2.0), f64x2::new(5.0, 3.0));
|
||||
let e = u64x2::new(!0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpunord_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpneq_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
|
||||
let e = u64x2::new(!0u64, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpneq_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpnlt_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(5.0, 3.0));
|
||||
let e = u64x2::new(0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpnlt_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpnle_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
|
||||
let e = u64x2::new(0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpnle_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpngt_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(5.0, 2.0), f64x2::new(1.0, 3.0));
|
||||
let e = u64x2::new(0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpngt_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpnge_sd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
|
||||
let e = u64x2::new(0, transmute(2.0f64));
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpnge_sd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn _mm_cmpeq_pd() {
|
||||
use std::mem::transmute;
|
||||
|
||||
unsafe {
|
||||
let (a, b) = (f64x2::new(1.0, 2.0), f64x2::new(1.0, 3.0));
|
||||
let e = u64x2::new(!0, 0);
|
||||
let r: u64x2 = transmute(sse2::_mm_cmpeq_pd(a, b));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue