From 3b7dc00f661b30f9c0dce489ca052cff22471898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eduardo=20S=C3=A1nchez=20Mu=C3=B1oz?= Date: Sat, 12 Aug 2023 00:26:13 +0200 Subject: [PATCH] Implement SSE2 and AVX unaligned stores (storeu) with `<*mut T>::write_unaligned` instead of LLVM intrinsics --- library/stdarch/crates/core_arch/src/x86/avx.rs | 12 +++--------- library/stdarch/crates/core_arch/src/x86/sse2.rs | 8 ++------ 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index fe7a04b5388d..00bcc1fa1bac 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -1439,7 +1439,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) { - storeupd256(mem_addr, a); + mem_addr.cast::<__m256d>().write_unaligned(a); } /// Loads 256-bits (composed of 8 packed single-precision (32-bit) @@ -1471,7 +1471,7 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 { #[cfg_attr(test, assert_instr(vmovups))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) { - storeups256(mem_addr, a); + mem_addr.cast::<__m256>().write_unaligned(a); } /// Loads 256-bits of integer data from memory into result. @@ -1527,7 +1527,7 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) { - storeudq256(mem_addr as *mut i8, a.as_i8x32()); + mem_addr.write_unaligned(a); } /// Loads packed double-precision (64-bit) floating-point elements from memory @@ -2974,12 +2974,6 @@ extern "C" { fn vbroadcastf128ps256(a: &__m128) -> __m256; #[link_name = "llvm.x86.avx.vbroadcastf128.pd.256"] fn vbroadcastf128pd256(a: &__m128d) -> __m256d; - #[link_name = "llvm.x86.avx.storeu.pd.256"] - fn storeupd256(mem_addr: *mut f64, a: __m256d); - #[link_name = "llvm.x86.avx.storeu.ps.256"] - fn storeups256(mem_addr: *mut f32, a: __m256); - #[link_name = "llvm.x86.avx.storeu.dq.256"] - fn storeudq256(mem_addr: *mut i8, a: i8x32); #[link_name = "llvm.x86.avx.maskload.pd.256"] fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d; #[link_name = "llvm.x86.avx.maskstore.pd.256"] diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs index f4fdb50469c5..342423c84ff9 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs @@ -1248,7 +1248,7 @@ pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { - storeudq(mem_addr as *mut i8, a); + mem_addr.write_unaligned(a); } /// Stores the lower 64-bit integer `a` to a memory location. @@ -2515,7 +2515,7 @@ pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { - storeupd(mem_addr as *mut i8, a); + mem_addr.cast::<__m128d>().write_unaligned(a); } /// Stores the lower double-precision (64-bit) floating-point element from `a` @@ -2920,10 +2920,6 @@ extern "C" { fn cvttsd2si(a: __m128d) -> i32; #[link_name = "llvm.x86.sse2.cvttps2dq"] fn cvttps2dq(a: __m128) -> i32x4; - #[link_name = "llvm.x86.sse2.storeu.dq"] - fn storeudq(mem_addr: *mut i8, a: __m128i); - #[link_name = "llvm.x86.sse2.storeu.pd"] - fn storeupd(mem_addr: *mut i8, a: __m128d); } #[cfg(test)]