From f0f5108a98731302023bdc643f77da9eba80ff38 Mon Sep 17 00:00:00 2001 From: gwenn Date: Sat, 30 Sep 2017 14:01:13 +0200 Subject: [PATCH] sse3: _mm_loaddup_pd and sse2: _mm_load1_pd --- library/stdarch/src/x86/sse2.rs | 17 +++++++++++++++++ library/stdarch/src/x86/sse3.rs | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/library/stdarch/src/x86/sse2.rs b/library/stdarch/src/x86/sse2.rs index d4822f857141..ad5cd3f4d27c 100644 --- a/library/stdarch/src/x86/sse2.rs +++ b/library/stdarch/src/x86/sse2.rs @@ -1762,6 +1762,16 @@ pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: f64x2) { *(mem_addr as *mut f64x2) = a; } +/// Load a double-precision (64-bit) floating-point element from memory +/// into both elements of returned vector. +#[inline(always)] +#[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(movddup))] +pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> f64x2 { + let d = *mem_addr; + f64x2::new(d, d) +} + #[allow(improper_ctypes)] extern { #[link_name = "llvm.x86.sse2.pause"] @@ -3463,4 +3473,11 @@ mod tests { let r = sse2::_mm_cvtpd_epi32(f64x2::new(f64::NAN, f64::NAN)); assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, 0, 0)); } + + #[simd_test = "sse2"] + unsafe fn _mm_load1_pd() { + let d = -5.0; + let r = sse2::_mm_load1_pd(&d); + assert_eq!(r, f64x2::new(d, d)); + } } diff --git a/library/stdarch/src/x86/sse3.rs b/library/stdarch/src/x86/sse3.rs index 8518ab4a1a57..40cab5509fbe 100644 --- a/library/stdarch/src/x86/sse3.rs +++ b/library/stdarch/src/x86/sse3.rs @@ -78,6 +78,16 @@ pub unsafe fn _mm_movedup_pd(a: f64x2) -> f64x2 { simd_shuffle2(a, a, [0, 0]) } +/// Load a double-precision (64-bit) floating-point element from memory +/// into both elements of return vector. +#[inline(always)] +#[target_feature = "+sse3"] +#[cfg_attr(test, assert_instr(movddup))] +pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> f64x2 { + use x86::sse2::_mm_load1_pd; + _mm_load1_pd(mem_addr) +} + /// Duplicate odd-indexed single-precision (32-bit) floating-point elements /// from `a`. #[inline(always)] @@ -197,4 +207,11 @@ mod tests { let r = sse3::_mm_moveldup_ps(a); assert_eq!(r, f32x4::new(-1.0, -1.0, 0.0, 0.0)); } + + #[simd_test = "sse3"] + unsafe fn _mm_loaddup_pd() { + let d = -5.0; + let r = sse3::_mm_loaddup_pd(&d); + assert_eq!(r, f64x2::new(d, d)); + } } \ No newline at end of file