From 0511ecbaf092797910e9348c219dfd68d34995d2 Mon Sep 17 00:00:00 2001 From: krampenschiesser Date: Thu, 28 Sep 2017 12:50:09 -0400 Subject: [PATCH] added support for _mm_cvtpd_ps / cvtpd2ps --- library/stdarch/src/x86/sse2.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs index 979e5efdd62e..6930cf886893 100644 --- a/library/stdarch/src/x86/sse2.rs +++ b/library/stdarch/src/x86/sse2.rs @@ -1718,6 +1718,14 @@ pub unsafe fn _mm_ucomineq_sd(a: f64x2, b: f64x2) -> bool { mem::transmute(ucomineqsd(a, b) as u8) } +/// Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements +#[inline(always)] +#[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(cvtpd2ps))] +pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 { + cvtpd2ps(a) +} + /// Return a mask of the most significant bit of each element in `a`. /// /// The mask is stored in the 2 least significant bits of the return value. @@ -1882,6 +1890,8 @@ extern { fn ucomineqsd(a: f64x2, b: f64x2) -> i32; #[link_name = "llvm.x86.sse2.movmsk.pd"] fn movmskpd(a: f64x2) -> i32; + #[link_name = "llvm.x86.sse2.cvtpd2ps"] + fn cvtpd2ps(a: f64x2) -> f32x4; } #[cfg(test)] @@ -3406,4 +3416,21 @@ mod tests { let r = sse2::_mm_movemask_pd(f64x2::new(-1.0, -5.0)); assert_eq!(r, 0b11); } + + #[simd_test = "sse2"] + unsafe fn _mm_cvtpd_ps() { + use std::{f64,f32}; + + let r = sse2::_mm_cvtpd_ps(f64x2::new(-1.0, 5.0)); + assert_eq!(r, f32x4::new(-1.0, 5.0, 0.0, 0.0)); + + let r = sse2::_mm_cvtpd_ps(f64x2::new(-1.0, -5.0)); + assert_eq!(r, f32x4::new(-1.0, -5.0, 0.0, 0.0)); + + let r = sse2::_mm_cvtpd_ps(f64x2::new(f64::MAX, f64::MIN)); + assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, 0.0,0.0)); + + let r = sse2::_mm_cvtpd_ps(f64x2::new(f32::MAX as f64, f32::MIN as f64)); + assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0,0.0)); + } }