Implement SSE2 and AVX unaligned stores (storeu) with <*mut T>::write_unaligned instead of LLVM intrinsics
This commit is contained in:
parent
17daea9747
commit
3b7dc00f66
2 changed files with 5 additions and 15 deletions
|
|
@ -1439,7 +1439,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
|
|||
#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
|
||||
storeupd256(mem_addr, a);
|
||||
mem_addr.cast::<__m256d>().write_unaligned(a);
|
||||
}
|
||||
|
||||
/// Loads 256-bits (composed of 8 packed single-precision (32-bit)
|
||||
|
|
@ -1471,7 +1471,7 @@ pub unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256 {
|
|||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256) {
|
||||
storeups256(mem_addr, a);
|
||||
mem_addr.cast::<__m256>().write_unaligned(a);
|
||||
}
|
||||
|
||||
/// Loads 256-bits of integer data from memory into result.
|
||||
|
|
@ -1527,7 +1527,7 @@ pub unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i {
|
|||
#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovdqu expected
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i) {
|
||||
storeudq256(mem_addr as *mut i8, a.as_i8x32());
|
||||
mem_addr.write_unaligned(a);
|
||||
}
|
||||
|
||||
/// Loads packed double-precision (64-bit) floating-point elements from memory
|
||||
|
|
@ -2974,12 +2974,6 @@ extern "C" {
|
|||
fn vbroadcastf128ps256(a: &__m128) -> __m256;
|
||||
#[link_name = "llvm.x86.avx.vbroadcastf128.pd.256"]
|
||||
fn vbroadcastf128pd256(a: &__m128d) -> __m256d;
|
||||
#[link_name = "llvm.x86.avx.storeu.pd.256"]
|
||||
fn storeupd256(mem_addr: *mut f64, a: __m256d);
|
||||
#[link_name = "llvm.x86.avx.storeu.ps.256"]
|
||||
fn storeups256(mem_addr: *mut f32, a: __m256);
|
||||
#[link_name = "llvm.x86.avx.storeu.dq.256"]
|
||||
fn storeudq256(mem_addr: *mut i8, a: i8x32);
|
||||
#[link_name = "llvm.x86.avx.maskload.pd.256"]
|
||||
fn maskloadpd256(mem_addr: *const i8, mask: i64x4) -> __m256d;
|
||||
#[link_name = "llvm.x86.avx.maskstore.pd.256"]
|
||||
|
|
|
|||
|
|
@ -1248,7 +1248,7 @@ pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
|
|||
#[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
|
||||
storeudq(mem_addr as *mut i8, a);
|
||||
mem_addr.write_unaligned(a);
|
||||
}
|
||||
|
||||
/// Stores the lower 64-bit integer `a` to a memory location.
|
||||
|
|
@ -2515,7 +2515,7 @@ pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
|
|||
#[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
|
||||
storeupd(mem_addr as *mut i8, a);
|
||||
mem_addr.cast::<__m128d>().write_unaligned(a);
|
||||
}
|
||||
|
||||
/// Stores the lower double-precision (64-bit) floating-point element from `a`
|
||||
|
|
@ -2920,10 +2920,6 @@ extern "C" {
|
|||
fn cvttsd2si(a: __m128d) -> i32;
|
||||
#[link_name = "llvm.x86.sse2.cvttps2dq"]
|
||||
fn cvttps2dq(a: __m128) -> i32x4;
|
||||
#[link_name = "llvm.x86.sse2.storeu.dq"]
|
||||
fn storeudq(mem_addr: *mut i8, a: __m128i);
|
||||
#[link_name = "llvm.x86.sse2.storeu.pd"]
|
||||
fn storeupd(mem_addr: *mut i8, a: __m128d);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue