diff --git a/library/stdarch/src/v256.rs b/library/stdarch/src/v256.rs index a5e163e45319..ada2c2d22b4b 100644 --- a/library/stdarch/src/v256.rs +++ b/library/stdarch/src/v256.rs @@ -73,6 +73,11 @@ define_from!(i16x16, u64x4, i64x4, u32x8, i32x8, u16x16, u8x32, i8x32); define_from!(u8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, i8x32); define_from!(i8x32, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32); +define_from!(f64x4, u64x4); +define_from!(u64x4, f64x4); +define_from!(f32x8, u32x8); +define_from!(u32x8, f32x8); + define_common_ops!( f64x4, f32x8, u64x4, i64x4, u32x8, i32x8, u16x16, i16x16, u8x32, i8x32); define_float_ops!(f64x4, f32x8); diff --git a/library/stdarch/src/x86/avx.rs b/library/stdarch/src/x86/avx.rs index 2f5ed80ed483..1501d063b8f7 100644 --- a/library/stdarch/src/x86/avx.rs +++ b/library/stdarch/src/x86/avx.rs @@ -20,6 +20,48 @@ pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 { a + b } +/// Compute the bitwise AND of a packed double-precision (64-bit) floating-point elements +/// in `a` and `b`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vandpd))] +pub unsafe fn _mm256_and_pd(a: f64x4, b: f64x4) -> f64x4 { + let a: u64x4 = a.into(); + let b: u64x4 = b.into(); + (a & b).into() +} + +/// Compute the bitwise AND of packed single-precision (32-bit) floating-point elements in `a` and `b`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vandps))] +pub unsafe fn _mm256_and_ps(a: f32x8, b: f32x8) -> f32x8 { + let a: u32x8 = a.into(); + let b: u32x8 = b.into(); + (a & b).into() +} + +/// Compute the bitwise OR packed double-precision (64-bit) floating-point elements +/// in `a` and `b`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vorpd))] +pub unsafe fn _mm256_or_pd(a: f64x4, b: f64x4) -> f64x4 { + let a: u64x4 = a.into(); + let b: u64x4 = b.into(); + (a | b).into() +} + +/// Compute the bitwise OR packed single-precision (32-bit) floating-point elements in `a` and `b`. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vorps))] +pub unsafe fn _mm256_or_ps(a: f32x8, b: f32x8) -> f32x8 { + let a: u32x8 = a.into(); + let b: u32x8 = b.into(); + (a | b).into() +} + /// Compare packed double-precision (64-bit) floating-point elements /// in `a` and `b`, and return packed maximum values #[inline(always)] @@ -252,6 +294,42 @@ mod tests { assert_eq!(r, e); } + #[simd_test = "avx"] + unsafe fn _mm256_and_pd() { + let a = f64x4::splat(1.0); + let b = f64x4::splat(0.6); + let r = avx::_mm256_and_pd(a, b); + let e = f64x4::splat(0.5); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_and_ps() { + let a = f32x8::splat(1.0); + let b = f32x8::splat(0.6); + let r = avx::_mm256_and_ps(a, b); + let e = f32x8::splat(0.5); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_or_pd() { + let a = f64x4::splat(1.0); + let b = f64x4::splat(0.6); + let r = avx::_mm256_or_pd(a, b); + let e = f64x4::splat(1.2); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_or_ps() { + let a = f32x8::splat(1.0); + let b = f32x8::splat(0.6); + let r = avx::_mm256_or_ps(a, b); + let e = f32x8::splat(1.2); + assert_eq!(r, e); + } + #[simd_test = "avx"] unsafe fn _mm256_max_pd() { let a = f64x4::new(1.0, 4.0, 5.0, 8.0);