added f32 and f64 unaligned stores and loads from avx512f set (#873)
This commit is contained in:
parent
02e1736720
commit
78135e1774
5 changed files with 168 additions and 56 deletions
|
|
@ -2956,8 +2956,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
|
|||
// This intrinsic has no corresponding instruction.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_undefined_ps() -> __m256 {
|
||||
// FIXME: this function should return MaybeUninit<__m256>
|
||||
mem::MaybeUninit::<__m256>::uninit().assume_init()
|
||||
_mm256_set1_ps(0.0)
|
||||
}
|
||||
|
||||
/// Returns vector of type `__m256d` with undefined elements.
|
||||
|
|
@ -2968,8 +2967,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {
|
|||
// This intrinsic has no corresponding instruction.
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm256_undefined_pd() -> __m256d {
|
||||
// FIXME: this function should return MaybeUninit<__m256d>
|
||||
mem::MaybeUninit::<__m256d>::uninit().assume_init()
|
||||
_mm256_set1_pd(0.0)
|
||||
}
|
||||
|
||||
/// Returns vector of type __m256i with undefined elements.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
use crate::{
|
||||
core_arch::{simd::*, simd_llvm::*, x86::*},
|
||||
mem::{self, transmute},
|
||||
ptr,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -1633,6 +1634,113 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
|
|||
transmute(r)
|
||||
}
|
||||
|
||||
/// Returns vector of type `__m512d` with undefined elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
pub unsafe fn _mm512_undefined_pd() -> __m512d {
|
||||
_mm512_set1_pd(0.0)
|
||||
}
|
||||
|
||||
/// Returns vector of type `__m512` with undefined elements.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
// This intrinsic has no corresponding instruction.
|
||||
pub unsafe fn _mm512_undefined_ps() -> __m512 {
|
||||
_mm512_set1_ps(0.0)
|
||||
}
|
||||
|
||||
/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
|
||||
/// floating-point elements) from memory into result.
|
||||
/// `mem_addr` does not need to be aligned on any particular boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
|
||||
ptr::read_unaligned(mem_addr as *const __m512d)
|
||||
}
|
||||
|
||||
/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
|
||||
/// floating-point elements) from `a` into memory.
|
||||
/// `mem_addr` does not need to be aligned on any particular boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
|
||||
ptr::write_unaligned(mem_addr as *mut __m512d, a);
|
||||
}
|
||||
|
||||
/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
|
||||
/// floating-point elements) from memory into result.
|
||||
/// `mem_addr` does not need to be aligned on any particular boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
|
||||
ptr::read_unaligned(mem_addr as *const __m512)
|
||||
}
|
||||
|
||||
/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
|
||||
/// floating-point elements) from `a` into memory.
|
||||
/// `mem_addr` does not need to be aligned on any particular boundary.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vmovups))]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
|
||||
ptr::write_unaligned(mem_addr as *mut __m512, a);
|
||||
}
|
||||
|
||||
/// Sets packed 64-bit integers in `dst` with the supplied values in
|
||||
/// reverse order.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_setr_pd(
|
||||
e0: f64,
|
||||
e1: f64,
|
||||
e2: f64,
|
||||
e3: f64,
|
||||
e4: f64,
|
||||
e5: f64,
|
||||
e6: f64,
|
||||
e7: f64,
|
||||
) -> __m512d {
|
||||
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Sets packed 64-bit integers in `dst` with the supplied values.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_set_pd(
|
||||
e0: f64,
|
||||
e1: f64,
|
||||
e2: f64,
|
||||
e3: f64,
|
||||
e4: f64,
|
||||
e5: f64,
|
||||
e6: f64,
|
||||
e7: f64,
|
||||
) -> __m512d {
|
||||
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
|
||||
}
|
||||
|
||||
/// Equal
|
||||
pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
|
||||
/// Less-than
|
||||
|
|
@ -1702,6 +1810,7 @@ mod tests {
|
|||
use stdarch_test::simd_test;
|
||||
|
||||
use crate::core_arch::x86::*;
|
||||
use crate::hint::black_box;
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_abs_epi32() {
|
||||
|
|
@ -2326,4 +2435,54 @@ mod tests {
|
|||
unsafe fn test_mm512_setzero_ps() {
|
||||
assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_loadu_pd() {
|
||||
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
|
||||
let p = a.as_ptr();
|
||||
let r = _mm512_loadu_pd(black_box(p));
|
||||
let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
|
||||
assert_eq_m512d(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_storeu_pd() {
|
||||
let a = _mm512_set1_pd(9.);
|
||||
let mut r = _mm512_undefined_pd();
|
||||
_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
|
||||
assert_eq_m512d(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_loadu_ps() {
|
||||
let a = &[
|
||||
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
|
||||
];
|
||||
let p = a.as_ptr();
|
||||
let r = _mm512_loadu_ps(black_box(p));
|
||||
let e = _mm512_setr_ps(
|
||||
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
|
||||
);
|
||||
assert_eq_m512(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_storeu_ps() {
|
||||
let a = _mm512_set1_ps(9.);
|
||||
let mut r = _mm512_undefined_ps();
|
||||
_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
|
||||
assert_eq_m512(r, a);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_setr_pd() {
|
||||
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
|
||||
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_set_pd() {
|
||||
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
|
||||
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1865,8 +1865,7 @@ pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) {
|
|||
#[target_feature(enable = "sse")]
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
pub unsafe fn _mm_undefined_ps() -> __m128 {
|
||||
// FIXME: this function should return MaybeUninit<__m128>
|
||||
mem::MaybeUninit::<__m128>::uninit().assume_init()
|
||||
_mm_set1_ps(0.0)
|
||||
}
|
||||
|
||||
/// Transpose the 4x4 matrix formed by 4 rows of __m128 in place.
|
||||
|
|
|
|||
|
|
@ -3,44 +3,6 @@ use crate::{
|
|||
mem::transmute,
|
||||
};
|
||||
|
||||
/// Sets packed 64-bit integers in `dst` with the supplied values.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_set_pd(
|
||||
e0: f64,
|
||||
e1: f64,
|
||||
e2: f64,
|
||||
e3: f64,
|
||||
e4: f64,
|
||||
e5: f64,
|
||||
e6: f64,
|
||||
e7: f64,
|
||||
) -> __m512d {
|
||||
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
|
||||
}
|
||||
|
||||
/// Sets packed 64-bit integers in `dst` with the supplied values in
|
||||
/// reverse order.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_pd)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_setr_pd(
|
||||
e0: f64,
|
||||
e1: f64,
|
||||
e2: f64,
|
||||
e3: f64,
|
||||
e4: f64,
|
||||
e5: f64,
|
||||
e6: f64,
|
||||
e7: f64,
|
||||
) -> __m512d {
|
||||
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
|
||||
transmute(r)
|
||||
}
|
||||
|
||||
/// Sets packed 64-bit integers in `dst` with the supplied values.
|
||||
///
|
||||
/// [Intel's documentation]( https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=727,1063,4909,1062,1062,4909&text=_mm512_set_epi64)
|
||||
|
|
@ -311,18 +273,6 @@ mod tests {
|
|||
assert_eq!(r, 0b01001010);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_set_pd() {
|
||||
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
|
||||
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_setr_pd() {
|
||||
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
|
||||
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_set_epi64() {
|
||||
let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
|
||||
|
|
|
|||
|
|
@ -282,6 +282,8 @@ fn verify_all_signatures() {
|
|||
"_mm_tzcnt_64",
|
||||
"_fxsave64",
|
||||
"_fxrstor64",
|
||||
"_mm512_undefined_ps",
|
||||
"_mm512_undefined_pd",
|
||||
];
|
||||
if !skip.contains(&rust.name) {
|
||||
println!(
|
||||
|
|
@ -625,6 +627,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
|
|||
|
||||
(&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {}
|
||||
(&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {}
|
||||
(&Type::MutPtr(&Type::PrimFloat(32)), "void*") => {}
|
||||
(&Type::MutPtr(&Type::PrimFloat(64)), "void*") => {}
|
||||
(&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
|
||||
(&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
|
||||
(&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {}
|
||||
|
|
@ -646,6 +650,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
|
|||
|
||||
(&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
|
||||
(&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
|
||||
(&Type::ConstPtr(&Type::PrimFloat(32)), "void const*") => {}
|
||||
(&Type::ConstPtr(&Type::PrimFloat(64)), "void const*") => {}
|
||||
(&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
|
||||
(&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
|
||||
(&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue