avx: add vandpd, vandps, vorps and vorpd

- HACK Warning: Add from impls for u64x4 <-> f64x4 and f32x8 <-> u32x8 - The 'assert_*' tests for the '*pd' instructions are failing due to llvm always using the single precision ('*ps') variation
2017-09-29 11:12:23 +01:00 · 2017-09-29 11:12:23 +01:00 · 9ad5c4e88a
commit 9ad5c4e88a
parent e6f343d989
2 changed files with 83 additions and 0 deletions
--- a/library/stdarch/src/v256.rs
+++ b/library/stdarch/src/v256.rs
@ -73,6 +73,11 @@ define_from!(i16x16, u64x4, i64x4, u32x8, i32x8, u16x16, u8x32, i8x32);
 define_from!(u8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, i8x32);
 define_from!(i8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32);

+define_from!(f64x4, u64x4);
+define_from!(u64x4, f64x4);
+define_from!(f32x8, u32x8);
+define_from!(u32x8, f32x8);
+
 define_common_ops!(
    f64x4, f32x8, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32, i8x32);
 define_float_ops!(f64x4, f32x8);
--- a/library/stdarch/src/x86/avx.rs
+++ b/library/stdarch/src/x86/avx.rs
@ -20,6 +20,48 @@ pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 {
    a + b
 }

+/// Compute the bitwise AND of a packed double-precision (64-bit) floating-point elements
+/// in `a` and `b`.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vandpd))]
+pub unsafe fn _mm256_and_pd(a: f64x4, b: f64x4) -> f64x4 {
+    let a: u64x4 = a.into();
+    let b: u64x4 = b.into();
+    (a & b).into()
+}
+
+/// Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in `a` and `b`.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vandps))]
+pub unsafe fn _mm256_and_ps(a: f32x8, b: f32x8) -> f32x8 {
+    let a: u32x8 = a.into();
+    let b: u32x8 = b.into();
+    (a & b).into()
+}
+
+/// Compute the bitwise OR packed double-precision (64-bit) floating-point elements
+/// in `a` and `b`.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vorpd))]
+pub unsafe fn _mm256_or_pd(a: f64x4, b: f64x4) -> f64x4 {
+    let a: u64x4 = a.into();
+    let b: u64x4 = b.into();
+    (a | b).into()
+}
+
+/// Compute the bitwise OR packed single-precision (32-bit) floating-point elements in `a` and `b`.
+#[inline(always)]
+#[target_feature = "+avx"]
+#[cfg_attr(test, assert_instr(vorps))]
+pub unsafe fn _mm256_or_ps(a: f32x8, b: f32x8) -> f32x8 {
+    let a: u32x8 = a.into();
+    let b: u32x8 = b.into();
+    (a | b).into()
+}
+
 /// Compare packed double-precision (64-bit) floating-point elements 
 /// in `a` and `b`, and return packed maximum values
 #[inline(always)]
@ -252,6 +294,42 @@ mod tests {
        assert_eq!(r, e);
    }

+    #[simd_test = "avx"]
+    unsafe fn _mm256_and_pd() {
+        let a = f64x4::splat(1.0);
+        let b = f64x4::splat(0.6);
+        let r = avx::_mm256_and_pd(a, b);
+        let e = f64x4::splat(0.5);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "avx"]
+    unsafe fn _mm256_and_ps() {
+        let a = f32x8::splat(1.0);
+        let b = f32x8::splat(0.6);
+        let r = avx::_mm256_and_ps(a, b);
+        let e = f32x8::splat(0.5);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "avx"]
+    unsafe fn _mm256_or_pd() {
+        let a = f64x4::splat(1.0);
+        let b = f64x4::splat(0.6);
+        let r = avx::_mm256_or_pd(a, b);
+        let e = f64x4::splat(1.2);
+        assert_eq!(r, e);
+    }
+
+    #[simd_test = "avx"]
+    unsafe fn _mm256_or_ps() {
+        let a = f32x8::splat(1.0);
+        let b = f32x8::splat(0.6);
+        let r = avx::_mm256_or_ps(a, b);
+        let e = f32x8::splat(1.2);
+        assert_eq!(r, e);
+    }
+
    #[simd_test = "avx"]
    unsafe fn _mm256_max_pd() {
        let a = f64x4::new(1.0, 4.0, 5.0, 8.0);