diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs index 9097d269893e..95aea69ef775 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs @@ -12,7 +12,8 @@ pub use self::generated::*; use crate::{ core_arch::{arm_shared::*, simd::*, simd_llvm::*}, hint::unreachable_unchecked, - mem::{transmute, zeroed}, + mem::{size_of, transmute, zeroed}, + ptr::copy_nonoverlapping, }; #[cfg(test)] use stdarch_test::assert_instr; @@ -795,6 +796,344 @@ pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t { transmute(f64x2::new(*ptr, *ptr.offset(1))) } +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { + copy_nonoverlapping( + &a as *const int8x8_t as *const i8, + ptr as *mut i8, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { + copy_nonoverlapping( + &a as *const int8x16_t as *const i8, + ptr as *mut i8, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { + copy_nonoverlapping( + &a as *const int16x4_t as *const i16, + ptr as *mut i16, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { + copy_nonoverlapping( + &a as *const int16x8_t as *const i16, + ptr as *mut i16, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { + copy_nonoverlapping( + &a as *const int32x2_t as *const i32, + ptr as *mut i32, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { + copy_nonoverlapping( + &a as *const int32x4_t as *const i32, + ptr as *mut i32, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { + copy_nonoverlapping( + &a as *const int64x1_t as *const i64, + ptr as *mut i64, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { + copy_nonoverlapping( + &a as *const int64x2_t as *const i64, + ptr as *mut i64, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { + copy_nonoverlapping( + &a as *const uint8x8_t as *const u8, + ptr as *mut u8, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { + copy_nonoverlapping( + &a as *const uint8x16_t as *const u8, + ptr as *mut u8, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { + copy_nonoverlapping( + &a as *const uint16x4_t as *const u16, + ptr as *mut u16, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { + copy_nonoverlapping( + &a as *const uint16x8_t as *const u16, + ptr as *mut u16, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { + copy_nonoverlapping( + &a as *const uint32x2_t as *const u32, + ptr as *mut u32, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { + copy_nonoverlapping( + &a as *const uint32x4_t as *const u32, + ptr as *mut u32, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { + copy_nonoverlapping( + &a as *const uint64x1_t as *const u64, + ptr as *mut u64, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { + copy_nonoverlapping( + &a as *const uint64x2_t as *const u64, + ptr as *mut u64, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { + copy_nonoverlapping( + &a as *const poly8x8_t as *const p8, + ptr as *mut p8, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { + copy_nonoverlapping( + &a as *const poly8x16_t as *const p8, + ptr as *mut p8, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { + copy_nonoverlapping( + &a as *const poly16x4_t as *const p16, + ptr as *mut p16, + size_of::(), + ) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { + copy_nonoverlapping( + &a as *const poly16x8_t as *const p16, + ptr as *mut p16, + size_of::(), + ) +} + +// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { + copy_nonoverlapping( + &a as *const poly64x1_t as *const p64, + ptr as *mut p64, + size_of::(), + ) +} + +// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { + copy_nonoverlapping( + &a as *const poly64x2_t as *const p64, + ptr as *mut p64, + size_of::(), + ) +} + +// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { + copy_nonoverlapping( + &a as *const float32x2_t as *const f32, + ptr as *mut f32, + size_of::(), + ) +} + +// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { + copy_nonoverlapping( + &a as *const float32x4_t as *const f32, + ptr as *mut f32, + size_of::(), + ) +} + +// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1_f64(ptr: *mut f64, a: float64x1_t) { + copy_nonoverlapping( + &a as *const float64x1_t as *const f64, + ptr as *mut f64, + size_of::(), + ) +} + +// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(str))] +#[allow(clippy::cast_ptr_alignment)] +pub unsafe fn vst1q_f64(ptr: *mut f64, a: float64x2_t) { + copy_nonoverlapping( + &a as *const float64x2_t as *const f64, + ptr as *mut f64, + size_of::(), + ) +} + /// Absolute Value (wrapping). #[inline] #[target_feature(enable = "neon")] @@ -4609,6 +4948,52 @@ mod tests { let e = 136_u16; assert_eq!(r, e); } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p64() { + let mut vals = [0_u64; 2]; + let a = u64x1::new(1); + + vst1_p64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p64() { + let mut vals = [0_u64; 3]; + let a = u64x2::new(1, 2); + + vst1q_p64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f64() { + let mut vals = [0_f64; 2]; + let a = f64x1::new(1.); + + vst1_f64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f64() { + let mut vals = [0_f64; 3]; + let a = f64x2::new(1., 2.); + + vst1q_f64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); + } } #[cfg(test)] @@ -4623,3 +5008,7 @@ mod shift_and_insert_tests; #[cfg(test)] #[path = "../../arm_shared/neon/load_tests.rs"] mod load_tests; + +#[cfg(test)] +#[path = "../../arm_shared/neon/store_tests.rs"] +mod store_tests; diff --git a/library/stdarch/crates/core_arch/src/arm/neon.rs b/library/stdarch/crates/core_arch/src/arm/neon.rs index 6bb1d0bfd4c8..473c753fd638 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon.rs @@ -1,7 +1,7 @@ use crate::core_arch::arm_shared::neon::*; use crate::core_arch::simd::{f32x4, i32x4, u32x4}; use crate::core_arch::simd_llvm::*; -use crate::mem::transmute; +use crate::mem::{align_of, transmute}; #[cfg(test)] use stdarch_test::assert_instr; @@ -11,8 +11,6 @@ pub(crate) type p8 = u8; #[allow(non_camel_case_types)] pub(crate) type p16 = u16; -use crate::mem::align_of; - #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.arm.neon.vbsl.v8i8"] @@ -107,6 +105,27 @@ extern "C" { fn vld1_v2f32(addr: *const i8, align: i32) -> float32x2_t; #[link_name = "llvm.arm.neon.vld1.v4f32.p0i8"] fn vld1q_v4f32(addr: *const i8, align: i32) -> float32x4_t; + + #[link_name = "llvm.arm.neon.vst1.p0i8.v8i8"] + fn vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v16i8"] + fn vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v4i16"] + fn vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v8i16"] + fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v2i32"] + fn vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v4i32"] + fn vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v1i64"] + fn vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v2i64"] + fn vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v2f32"] + fn vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32); + #[link_name = "llvm.arm.neon.vst1.p0i8.v4f32"] + fn vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32); } /// Load multiple single-element structures to one, two, three, or four registers. @@ -285,6 +304,182 @@ pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { vld1q_v4f32(ptr as *const i8, align_of::() as i32) } +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { + vst1_v8i8(ptr as *const i8, a, align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { + vst1q_v16i8(ptr as *const i8, a, align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { + vst1_v4i16(ptr as *const i8, a, align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { + vst1q_v8i16(ptr as *const i8, a, align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { + vst1_v2i32(ptr as *const i8, a, align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { + vst1q_v4i32(ptr as *const i8, a, align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { + vst1_v1i64(ptr as *const i8, a, align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { + vst1q_v2i64(ptr as *const i8, a, align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { + vst1_v8i8(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { + vst1q_v16i8(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { + vst1_v4i16(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { + vst1q_v8i16(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { + vst1_v2i32(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { + vst1q_v4i32(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { + vst1_v1i64(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { + vst1q_v2i64(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { + vst1_v8i8(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { + vst1q_v16i8(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { + vst1_v4i16(ptr as *const i8, transmute(a), align_of::() as i32) +} + +/// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { + vst1q_v8i16(ptr as *const i8, transmute(a), align_of::() as i32) +} + +// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { + vst1_v2f32(ptr as *const i8, a, align_of::() as i32) +} + +// Store multiple single-element structures from one, two, three, or four registers. +#[inline] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(str))] +pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { + vst1q_v4f32(ptr as *const i8, a, align_of::() as i32) +} + /// Table look-up #[inline] #[cfg(target_endian = "little")] diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs index 736ac26f5af7..9ed9f77aa775 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs @@ -9992,3 +9992,6 @@ mod shift_and_insert_tests; #[cfg(all(test, target_arch = "arm"))] mod load_tests; + +#[cfg(all(test, target_arch = "arm"))] +mod store_tests; diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs new file mode 100644 index 000000000000..c1e355fd9a09 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs @@ -0,0 +1,366 @@ +//! Tests for ARM+v7+neon store (vst1) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +#[cfg(target_arch = "aarch64")] +use crate::core_arch::aarch64::*; + +use crate::core_arch::simd::*; +use stdarch_test::simd_test; + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s8() { + let mut vals = [0_i8; 9]; + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_s8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s8() { + let mut vals = [0_i8; 17]; + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_s8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s16() { + let mut vals = [0_i16; 5]; + let a = i16x4::new(1, 2, 3, 4); + + vst1_s16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s16() { + let mut vals = [0_i16; 9]; + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_s16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s32() { + let mut vals = [0_i32; 3]; + let a = i32x2::new(1, 2); + + vst1_s32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s32() { + let mut vals = [0_i32; 5]; + let a = i32x4::new(1, 2, 3, 4); + + vst1q_s32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s64() { + let mut vals = [0_i64; 2]; + let a = i64x1::new(1); + + vst1_s64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s64() { + let mut vals = [0_i64; 3]; + let a = i64x2::new(1, 2); + + vst1q_s64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u8() { + let mut vals = [0_u8; 9]; + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_u8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u8() { + let mut vals = [0_u8; 17]; + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_u8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u16() { + let mut vals = [0_u16; 5]; + let a = u16x4::new(1, 2, 3, 4); + + vst1_u16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u16() { + let mut vals = [0_u16; 9]; + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_u16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u32() { + let mut vals = [0_u32; 3]; + let a = u32x2::new(1, 2); + + vst1_u32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u32() { + let mut vals = [0_u32; 5]; + let a = u32x4::new(1, 2, 3, 4); + + vst1q_u32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u64() { + let mut vals = [0_u64; 2]; + let a = u64x1::new(1); + + vst1_u64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u64() { + let mut vals = [0_u64; 3]; + let a = u64x2::new(1, 2); + + vst1q_u64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_p8() { + let mut vals = [0_u8; 9]; + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_p8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_p8() { + let mut vals = [0_u8; 17]; + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_p8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_p16() { + let mut vals = [0_u16; 5]; + let a = u16x4::new(1, 2, 3, 4); + + vst1_p16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_p16() { + let mut vals = [0_u16; 9]; + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_p16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_f32() { + let mut vals = [0_f32; 3]; + let a = f32x2::new(1., 2.); + + vst1_f32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_f32() { + let mut vals = [0_f32; 5]; + let a = f32x4::new(1., 2., 3., 4.); + + vst1q_f32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); + assert_eq!(vals[3], 3.); + assert_eq!(vals[4], 4.); +} diff --git a/library/stdarch/crates/stdarch-verify/tests/arm.rs b/library/stdarch/crates/stdarch-verify/tests/arm.rs index a2e9211758e0..da17eb313ca0 100644 --- a/library/stdarch/crates/stdarch-verify/tests/arm.rs +++ b/library/stdarch/crates/stdarch-verify/tests/arm.rs @@ -436,6 +436,28 @@ fn verify_all_signatures() { "vld1q_f32", "vld1_f64", "vld1q_f64", + "vst1_s8", + "vst1q_s8", + "vst1_s16", + "vst1q_s16", + "vst1_s32", + "vst1q_s32", + "vst1_s64", + "vst1q_s64", + "vst1_u8", + "vst1q_u8", + "vst1_u16", + "vst1q_u16", + "vst1_u32", + "vst1q_u32", + "vst1_u64", + "vst1q_u64", + "vst1_p8", + "vst1q_p8", + "vst1_p16", + "vst1q_p16", + "vst1_f32", + "vst1q_f32", "vpadal_s8", "vpadal_s16", "vpadal_s32",