From 3dba6f3b4dcbd346145a2da0100e97a3f816f18b Mon Sep 17 00:00:00 2001 From: p32blo Date: Thu, 28 Sep 2017 11:45:41 +0100 Subject: [PATCH] avx: add vmaxpd, vmaxps, vminpd, vminps --- library/stdarch/src/x86/avx.rs | 80 ++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/library/stdarch/src/x86/avx.rs b/library/stdarch/src/x86/avx.rs index 8829985da9dc..2f5ed80ed483 100644 --- a/library/stdarch/src/x86/avx.rs +++ b/library/stdarch/src/x86/avx.rs @@ -20,6 +20,42 @@ pub unsafe fn _mm256_add_ps(a: f32x8, b: f32x8) -> f32x8 { a + b } +/// Compare packed double-precision (64-bit) floating-point elements +/// in `a` and `b`, and return packed maximum values +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vmaxpd))] +pub unsafe fn _mm256_max_pd(a: f64x4, b: f64x4) -> f64x4 { + maxpd256(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, +/// and return packed maximum values +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vmaxps))] +pub unsafe fn _mm256_max_ps(a: f32x8, b: f32x8) -> f32x8 { + maxps256(a, b) +} + +/// Compare packed double-precision (64-bit) floating-point elements +/// in `a` and `b`, and return packed minimum values +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vminpd))] +pub unsafe fn _mm256_min_pd(a: f64x4, b: f64x4) -> f64x4 { + minpd256(a, b) +} + +/// Compare packed single-precision (32-bit) floating-point elements in `a` and `b`, +/// and return packed minimum values +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vminps))] +pub unsafe fn _mm256_min_ps(a: f32x8, b: f32x8) -> f32x8 { + minps256(a, b) +} + /// Add packed double-precision (64-bit) floating-point elements /// in `a` and `b`. #[inline(always)] @@ -173,6 +209,14 @@ extern "C" { fn addsubpd256(a: f64x4, b: f64x4) -> f64x4; #[link_name = "llvm.x86.avx.addsub.ps.256"] fn addsubps256(a: f32x8, b: f32x8) -> f32x8; + #[link_name = "llvm.x86.avx.max.pd.256"] + fn maxpd256(a: f64x4, b: f64x4) -> f64x4; + #[link_name = "llvm.x86.avx.max.ps.256"] + fn maxps256(a: f32x8, b: f32x8) -> f32x8; + #[link_name = "llvm.x86.avx.min.pd.256"] + fn minpd256(a: f64x4, b: f64x4) -> f64x4; + #[link_name = "llvm.x86.avx.min.ps.256"] + fn minps256(a: f32x8, b: f32x8) -> f32x8; #[link_name = "llvm.x86.avx.round.pd.256"] fn roundpd256(a: f64x4, b: i32) -> f64x4; #[link_name = "llvm.x86.avx.round.ps.256"] @@ -208,6 +252,42 @@ mod tests { assert_eq!(r, e); } + #[simd_test = "avx"] + unsafe fn _mm256_max_pd() { + let a = f64x4::new(1.0, 4.0, 5.0, 8.0); + let b = f64x4::new(2.0, 3.0, 6.0, 7.0); + let r = avx::_mm256_max_pd(a, b); + let e = f64x4::new(2.0, 4.0, 6.0, 8.0); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_max_ps() { + let a = f32x8::new(1.0, 4.0, 5.0, 8.0, 9.0, 12.0, 13.0, 16.0); + let b = f32x8::new(2.0, 3.0, 6.0, 7.0, 10.0, 11.0, 14.0, 15.0); + let r = avx::_mm256_max_ps(a, b); + let e = f32x8::new(2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_min_pd() { + let a = f64x4::new(1.0, 4.0, 5.0, 8.0); + let b = f64x4::new(2.0, 3.0, 6.0, 7.0); + let r = avx::_mm256_min_pd(a, b); + let e = f64x4::new(1.0, 3.0, 5.0, 7.0); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_min_ps() { + let a = f32x8::new(1.0, 4.0, 5.0, 8.0, 9.0, 12.0, 13.0, 16.0); + let b = f32x8::new(2.0, 3.0, 6.0, 7.0, 10.0, 11.0, 14.0, 15.0); + let r = avx::_mm256_min_ps(a, b); + let e = f32x8::new(1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0); + assert_eq!(r, e); + } + #[simd_test = "avx"] unsafe fn _mm256_mul_pd() { let a = f64x4::new(1.0, 2.0, 3.0, 4.0);