Add vst1_* neon intrinsics. (#1171)

This commit is contained in:
Nils Hasenbanck 2021-05-27 08:40:45 +02:00 committed by GitHub
parent 10f7ebc387
commit 3ecc56b329
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 979 additions and 4 deletions

View file

@ -12,7 +12,8 @@ pub use self::generated::*;
use crate::{
core_arch::{arm_shared::*, simd::*, simd_llvm::*},
hint::unreachable_unchecked,
mem::{transmute, zeroed},
mem::{size_of, transmute, zeroed},
ptr::copy_nonoverlapping,
};
#[cfg(test)]
use stdarch_test::assert_instr;
@ -795,6 +796,344 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
transmute(f64x2::new(*ptr, *ptr.offset(1)))
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
copy_nonoverlapping(
&a as *const int8x8_t as *const i8,
ptr as *mut i8,
size_of::<int8x8_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
copy_nonoverlapping(
&a as *const int8x16_t as *const i8,
ptr as *mut i8,
size_of::<int8x16_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
copy_nonoverlapping(
&a as *const int16x4_t as *const i16,
ptr as *mut i16,
size_of::<int16x4_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
copy_nonoverlapping(
&a as *const int16x8_t as *const i16,
ptr as *mut i16,
size_of::<int16x8_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
copy_nonoverlapping(
&a as *const int32x2_t as *const i32,
ptr as *mut i32,
size_of::<int32x2_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
copy_nonoverlapping(
&a as *const int32x4_t as *const i32,
ptr as *mut i32,
size_of::<int32x4_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
copy_nonoverlapping(
&a as *const int64x1_t as *const i64,
ptr as *mut i64,
size_of::<int64x1_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
copy_nonoverlapping(
&a as *const int64x2_t as *const i64,
ptr as *mut i64,
size_of::<int64x2_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
copy_nonoverlapping(
&a as *const uint8x8_t as *const u8,
ptr as *mut u8,
size_of::<uint8x8_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
copy_nonoverlapping(
&a as *const uint8x16_t as *const u8,
ptr as *mut u8,
size_of::<uint8x16_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
copy_nonoverlapping(
&a as *const uint16x4_t as *const u16,
ptr as *mut u16,
size_of::<uint16x4_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
copy_nonoverlapping(
&a as *const uint16x8_t as *const u16,
ptr as *mut u16,
size_of::<uint16x8_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
copy_nonoverlapping(
&a as *const uint32x2_t as *const u32,
ptr as *mut u32,
size_of::<uint32x2_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
copy_nonoverlapping(
&a as *const uint32x4_t as *const u32,
ptr as *mut u32,
size_of::<uint32x4_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
copy_nonoverlapping(
&a as *const uint64x1_t as *const u64,
ptr as *mut u64,
size_of::<uint64x1_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
copy_nonoverlapping(
&a as *const uint64x2_t as *const u64,
ptr as *mut u64,
size_of::<uint64x2_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
copy_nonoverlapping(
&a as *const poly8x8_t as *const p8,
ptr as *mut p8,
size_of::<poly8x8_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
copy_nonoverlapping(
&a as *const poly8x16_t as *const p8,
ptr as *mut p8,
size_of::<poly8x16_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
copy_nonoverlapping(
&a as *const poly16x4_t as *const p16,
ptr as *mut p16,
size_of::<poly16x4_t>(),
)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
copy_nonoverlapping(
&a as *const poly16x8_t as *const p16,
ptr as *mut p16,
size_of::<poly16x8_t>(),
)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) {
copy_nonoverlapping(
&a as *const poly64x1_t as *const p64,
ptr as *mut p64,
size_of::<poly64x1_t>(),
)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) {
copy_nonoverlapping(
&a as *const poly64x2_t as *const p64,
ptr as *mut p64,
size_of::<poly64x2_t>(),
)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
copy_nonoverlapping(
&a as *const float32x2_t as *const f32,
ptr as *mut f32,
size_of::<float32x2_t>(),
)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
copy_nonoverlapping(
&a as *const float32x4_t as *const f32,
ptr as *mut f32,
size_of::<float32x4_t>(),
)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) {
copy_nonoverlapping(
&a as *const float64x1_t as *const f64,
ptr as *mut f64,
size_of::<float64x1_t>(),
)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(str))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) {
copy_nonoverlapping(
&a as *const float64x2_t as *const f64,
ptr as *mut f64,
size_of::<float64x2_t>(),
)
}
/// Absolute Value (wrapping).
#[inline]
#[target_feature(enable = "neon")]
@ -4609,6 +4948,52 @@ mod tests {
let e = 136_u16;
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p64() {
let mut vals = [0_u64; 2];
let a = u64x1::new(1);
vst1_p64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p64() {
let mut vals = [0_u64; 3];
let a = u64x2::new(1, 2);
vst1q_p64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_f64() {
let mut vals = [0_f64; 2];
let a = f64x1::new(1.);
vst1_f64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0.);
assert_eq!(vals[1], 1.);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_f64() {
let mut vals = [0_f64; 3];
let a = f64x2::new(1., 2.);
vst1q_f64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0.);
assert_eq!(vals[1], 1.);
assert_eq!(vals[2], 2.);
}
}
#[cfg(test)]
@ -4623,3 +5008,7 @@ mod shift_and_insert_tests;
#[cfg(test)]
#[path = "../../arm_shared/neon/load_tests.rs"]
mod load_tests;
#[cfg(test)]
#[path = "../../arm_shared/neon/store_tests.rs"]
mod store_tests;

View file

@ -1,7 +1,7 @@
use crate::core_arch::arm_shared::neon::*;
use crate::core_arch::simd::{f32x4, i32x4, u32x4};
use crate::core_arch::simd_llvm::*;
use crate::mem::transmute;
use crate::mem::{align_of, transmute};
#[cfg(test)]
use stdarch_test::assert_instr;
@ -11,8 +11,6 @@ pub(crate) type p8 = u8;
#[allow(non_camel_case_types)]
pub(crate) type p16 = u16;
use crate::mem::align_of;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.arm.neon.vbsl.v8i8"]
@ -107,6 +105,27 @@ extern "C" {
fn vld1_v2f32(addr: *const i8, align: i32) -> float32x2_t;
#[link_name = "llvm.arm.neon.vld1.v4f32.p0i8"]
fn vld1q_v4f32(addr: *const i8, align: i32) -> float32x4_t;
#[link_name = "llvm.arm.neon.vst1.p0i8.v8i8"]
fn vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v16i8"]
fn vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v4i16"]
fn vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v8i16"]
fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v2i32"]
fn vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v4i32"]
fn vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v1i64"]
fn vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v2i64"]
fn vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v2f32"]
fn vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32);
#[link_name = "llvm.arm.neon.vst1.p0i8.v4f32"]
fn vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32);
}
/// Load multiple single-element structures to one, two, three, or four registers.
@ -285,6 +304,182 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
vld1q_v4f32(ptr as *const i8, align_of::<f32>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) {
vst1_v8i8(ptr as *const i8, a, align_of::<i8>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) {
vst1q_v16i8(ptr as *const i8, a, align_of::<i8>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) {
vst1_v4i16(ptr as *const i8, a, align_of::<i16>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) {
vst1q_v8i16(ptr as *const i8, a, align_of::<i16>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) {
vst1_v2i32(ptr as *const i8, a, align_of::<i32>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) {
vst1q_v4i32(ptr as *const i8, a, align_of::<i32>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) {
vst1_v1i64(ptr as *const i8, a, align_of::<i64>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) {
vst1q_v2i64(ptr as *const i8, a, align_of::<i64>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) {
vst1_v8i8(ptr as *const i8, transmute(a), align_of::<u8>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) {
vst1q_v16i8(ptr as *const i8, transmute(a), align_of::<u8>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) {
vst1_v4i16(ptr as *const i8, transmute(a), align_of::<u16>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) {
vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<u16>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) {
vst1_v2i32(ptr as *const i8, transmute(a), align_of::<u32>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) {
vst1q_v4i32(ptr as *const i8, transmute(a), align_of::<u32>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) {
vst1_v1i64(ptr as *const i8, transmute(a), align_of::<u64>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) {
vst1q_v2i64(ptr as *const i8, transmute(a), align_of::<u64>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) {
vst1_v8i8(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) {
vst1q_v16i8(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) {
vst1_v4i16(ptr as *const i8, transmute(a), align_of::<p16>() as i32)
}
/// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) {
vst1q_v8i16(ptr as *const i8, transmute(a), align_of::<p8>() as i32)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) {
vst1_v2f32(ptr as *const i8, a, align_of::<f32>() as i32)
}
// Store multiple single-element structures from one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon,v7")]
#[cfg_attr(test, assert_instr(str))]
pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) {
vst1q_v4f32(ptr as *const i8, a, align_of::<f32>() as i32)
}
/// Table look-up
#[inline]
#[cfg(target_endian = "little")]

View file

@ -9992,3 +9992,6 @@ mod shift_and_insert_tests;
#[cfg(all(test, target_arch = "arm"))]
mod load_tests;
#[cfg(all(test, target_arch = "arm"))]
mod store_tests;

View file

@ -0,0 +1,366 @@
//! Tests for ARM+v7+neon store (vst1) intrinsics.
//!
//! These are included in `{arm, aarch64}::neon`.
use super::*;
#[cfg(target_arch = "arm")]
use crate::core_arch::arm::*;
#[cfg(target_arch = "aarch64")]
use crate::core_arch::aarch64::*;
use crate::core_arch::simd::*;
use stdarch_test::simd_test;
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s8() {
let mut vals = [0_i8; 9];
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_s8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s8() {
let mut vals = [0_i8; 17];
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_s8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
assert_eq!(vals[9], 9);
assert_eq!(vals[10], 10);
assert_eq!(vals[11], 11);
assert_eq!(vals[12], 12);
assert_eq!(vals[13], 13);
assert_eq!(vals[14], 14);
assert_eq!(vals[15], 15);
assert_eq!(vals[16], 16);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s16() {
let mut vals = [0_i16; 5];
let a = i16x4::new(1, 2, 3, 4);
vst1_s16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s16() {
let mut vals = [0_i16; 9];
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_s16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s32() {
let mut vals = [0_i32; 3];
let a = i32x2::new(1, 2);
vst1_s32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s32() {
let mut vals = [0_i32; 5];
let a = i32x4::new(1, 2, 3, 4);
vst1q_s32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_s64() {
let mut vals = [0_i64; 2];
let a = i64x1::new(1);
vst1_s64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_s64() {
let mut vals = [0_i64; 3];
let a = i64x2::new(1, 2);
vst1q_s64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u8() {
let mut vals = [0_u8; 9];
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_u8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u8() {
let mut vals = [0_u8; 17];
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_u8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
assert_eq!(vals[9], 9);
assert_eq!(vals[10], 10);
assert_eq!(vals[11], 11);
assert_eq!(vals[12], 12);
assert_eq!(vals[13], 13);
assert_eq!(vals[14], 14);
assert_eq!(vals[15], 15);
assert_eq!(vals[16], 16);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u16() {
let mut vals = [0_u16; 5];
let a = u16x4::new(1, 2, 3, 4);
vst1_u16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u16() {
let mut vals = [0_u16; 9];
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_u16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u32() {
let mut vals = [0_u32; 3];
let a = u32x2::new(1, 2);
vst1_u32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u32() {
let mut vals = [0_u32; 5];
let a = u32x4::new(1, 2, 3, 4);
vst1q_u32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_u64() {
let mut vals = [0_u64; 2];
let a = u64x1::new(1);
vst1_u64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_u64() {
let mut vals = [0_u64; 3];
let a = u64x2::new(1, 2);
vst1q_u64(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p8() {
let mut vals = [0_u8; 9];
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1_p8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p8() {
let mut vals = [0_u8; 17];
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
vst1q_p8(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
assert_eq!(vals[9], 9);
assert_eq!(vals[10], 10);
assert_eq!(vals[11], 11);
assert_eq!(vals[12], 12);
assert_eq!(vals[13], 13);
assert_eq!(vals[14], 14);
assert_eq!(vals[15], 15);
assert_eq!(vals[16], 16);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_p16() {
let mut vals = [0_u16; 5];
let a = u16x4::new(1, 2, 3, 4);
vst1_p16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_p16() {
let mut vals = [0_u16; 9];
let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
vst1q_p16(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0);
assert_eq!(vals[1], 1);
assert_eq!(vals[2], 2);
assert_eq!(vals[3], 3);
assert_eq!(vals[4], 4);
assert_eq!(vals[5], 5);
assert_eq!(vals[6], 6);
assert_eq!(vals[7], 7);
assert_eq!(vals[8], 8);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1_f32() {
let mut vals = [0_f32; 3];
let a = f32x2::new(1., 2.);
vst1_f32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0.);
assert_eq!(vals[1], 1.);
assert_eq!(vals[2], 2.);
}
#[simd_test(enable = "neon")]
unsafe fn test_vst1q_f32() {
let mut vals = [0_f32; 5];
let a = f32x4::new(1., 2., 3., 4.);
vst1q_f32(vals[1..].as_mut_ptr(), transmute(a));
assert_eq!(vals[0], 0.);
assert_eq!(vals[1], 1.);
assert_eq!(vals[2], 2.);
assert_eq!(vals[3], 3.);
assert_eq!(vals[4], 4.);
}

View file

@ -436,6 +436,28 @@ fn verify_all_signatures() {
"vld1q_f32",
"vld1_f64",
"vld1q_f64",
"vst1_s8",
"vst1q_s8",
"vst1_s16",
"vst1q_s16",
"vst1_s32",
"vst1q_s32",
"vst1_s64",
"vst1q_s64",
"vst1_u8",
"vst1q_u8",
"vst1_u16",
"vst1q_u16",
"vst1_u32",
"vst1q_u32",
"vst1_u64",
"vst1q_u64",
"vst1_p8",
"vst1q_p8",
"vst1_p16",
"vst1q_p16",
"vst1_f32",
"vst1q_f32",
"vpadal_s8",
"vpadal_s16",
"vpadal_s32",