diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 97e794f2a1cb..dd6494d5f68d 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -4592,6 +4592,84 @@ pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t { vld1q_f64_x4_(a) } +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_f64_x2(a: *mut f64, b: float64x1x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v1f64.p0f64")] + fn vst1_f64_x2_(a: float64x1_t, b: float64x1_t, ptr: *mut f64); + } + vst1_f64_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_f64_x2(a: *mut f64, b: float64x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2f64.p0f64")] + fn vst1q_f64_x2_(a: float64x2_t, b: float64x2_t, ptr: *mut f64); + } + vst1q_f64_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_f64_x3(a: *mut f64, b: float64x1x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v1f64.p0f64")] + fn vst1_f64_x3_(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut f64); + } + vst1_f64_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_f64_x3(a: *mut f64, b: float64x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2f64.p0f64")] + fn vst1q_f64_x3_(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut f64); + } + vst1q_f64_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1_f64_x4(a: *mut f64, b: float64x1x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v1f64.p0f64")] + fn vst1_f64_x4_(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut f64); + } + vst1_f64_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +pub unsafe fn vst1q_f64_x4(a: *mut f64, b: float64x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2f64.p0f64")] + fn vst1q_f64_x4_(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut f64); + } + vst1q_f64_x4_(b.0, b.1, b.2, b.3, a) +} + /// Multiply #[inline] #[target_feature(enable = "neon")] @@ -12983,6 +13061,60 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f64_x2() { + let a: [f64; 3] = [0., 1., 2.]; + let e: [f64; 2] = [1., 2.]; + let mut r: [f64; 2] = [0f64; 2]; + vst1_f64_x2(r.as_mut_ptr(), vld1_f64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f64_x2() { + let a: [f64; 5] = [0., 1., 2., 3., 4.]; + let e: [f64; 4] = [1., 2., 3., 4.]; + let mut r: [f64; 4] = [0f64; 4]; + vst1q_f64_x2(r.as_mut_ptr(), vld1q_f64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f64_x3() { + let a: [f64; 4] = [0., 1., 2., 3.]; + let e: [f64; 3] = [1., 2., 3.]; + let mut r: [f64; 3] = [0f64; 3]; + vst1_f64_x3(r.as_mut_ptr(), vld1_f64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f64_x3() { + let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.]; + let e: [f64; 6] = [1., 2., 3., 4., 5., 6.]; + let mut r: [f64; 6] = [0f64; 6]; + vst1q_f64_x3(r.as_mut_ptr(), vld1q_f64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f64_x4() { + let a: [f64; 5] = [0., 1., 2., 3., 4.]; + let e: [f64; 4] = [1., 2., 3., 4.]; + let mut r: [f64; 4] = [0f64; 4]; + vst1_f64_x4(r.as_mut_ptr(), vld1_f64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f64_x4() { + let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let e: [f64; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; + let mut r: [f64; 8] = [0f64; 8]; + vst1q_f64_x4(r.as_mut_ptr(), vld1q_f64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmul_f64() { let a: f64 = 1.0; diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index bdf8937d9d9d..e8b76ae37706 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -6698,6 +6698,1206 @@ pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { vld1q_f32_x4_(a) } +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v8i8")] + fn vst1_s8_x2_(ptr: *mut i8, a: int8x8_t, b: int8x8_t); + } +vst1_s8_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v8i8.p0i8")] + fn vst1_s8_x2_(a: int8x8_t, b: int8x8_t, ptr: *mut i8); + } +vst1_s8_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v4i16")] + fn vst1_s16_x2_(ptr: *mut i16, a: int16x4_t, b: int16x4_t); + } +vst1_s16_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4i16.p0i16")] + fn vst1_s16_x2_(a: int16x4_t, b: int16x4_t, ptr: *mut i16); + } +vst1_s16_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v2i32")] + fn vst1_s32_x2_(ptr: *mut i32, a: int32x2_t, b: int32x2_t); + } +vst1_s32_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2i32.p0i32")] + fn vst1_s32_x2_(a: int32x2_t, b: int32x2_t, ptr: *mut i32); + } +vst1_s32_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v1i64")] + fn vst1_s64_x2_(ptr: *mut i64, a: int64x1_t, b: int64x1_t); + } +vst1_s64_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v1i64.p0i64")] + fn vst1_s64_x2_(a: int64x1_t, b: int64x1_t, ptr: *mut i64); + } +vst1_s64_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v16i8")] + fn vst1q_s8_x2_(ptr: *mut i8, a: int8x16_t, b: int8x16_t); + } +vst1q_s8_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v16i8.p0i8")] + fn vst1q_s8_x2_(a: int8x16_t, b: int8x16_t, ptr: *mut i8); + } +vst1q_s8_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v8i16")] + fn vst1q_s16_x2_(ptr: *mut i16, a: int16x8_t, b: int16x8_t); + } +vst1q_s16_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v8i16.p0i16")] + fn vst1q_s16_x2_(a: int16x8_t, b: int16x8_t, ptr: *mut i16); + } +vst1q_s16_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v4i32")] + fn vst1q_s32_x2_(ptr: *mut i32, a: int32x4_t, b: int32x4_t); + } +vst1q_s32_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4i32.p0i32")] + fn vst1q_s32_x2_(a: int32x4_t, b: int32x4_t, ptr: *mut i32); + } +vst1q_s32_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v2i64")] + fn vst1q_s64_x2_(ptr: *mut i64, a: int64x2_t, b: int64x2_t); + } +vst1q_s64_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2i64.p0i64")] + fn vst1q_s64_x2_(a: int64x2_t, b: int64x2_t, ptr: *mut i64); + } +vst1q_s64_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v8i8")] + fn vst1_s8_x3_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t); + } +vst1_s8_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v8i8.p0i8")] + fn vst1_s8_x3_(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); + } +vst1_s8_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v4i16")] + fn vst1_s16_x3_(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t); + } +vst1_s16_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4i16.p0i16")] + fn vst1_s16_x3_(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16); + } +vst1_s16_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v2i32")] + fn vst1_s32_x3_(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t); + } +vst1_s32_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2i32.p0i32")] + fn vst1_s32_x3_(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32); + } +vst1_s32_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v1i64")] + fn vst1_s64_x3_(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t); + } +vst1_s64_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v1i64.p0i64")] + fn vst1_s64_x3_(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64); + } +vst1_s64_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v16i8")] + fn vst1q_s8_x3_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t); + } +vst1q_s8_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v16i8.p0i8")] + fn vst1q_s8_x3_(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); + } +vst1q_s8_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v8i16")] + fn vst1q_s16_x3_(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t); + } +vst1q_s16_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v8i16.p0i16")] + fn vst1q_s16_x3_(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16); + } +vst1q_s16_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v4i32")] + fn vst1q_s32_x3_(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t); + } +vst1q_s32_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4i32.p0i32")] + fn vst1q_s32_x3_(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32); + } +vst1q_s32_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v2i64")] + fn vst1q_s64_x3_(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t); + } +vst1q_s64_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2i64.p0i64")] + fn vst1q_s64_x3_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64); + } +vst1q_s64_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v8i8")] + fn vst1_s8_x4_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t); + } +vst1_s8_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v8i8.p0i8")] + fn vst1_s8_x4_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); + } +vst1_s8_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v4i16")] + fn vst1_s16_x4_(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t); + } +vst1_s16_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4i16.p0i16")] + fn vst1_s16_x4_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16); + } +vst1_s16_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v2i32")] + fn vst1_s32_x4_(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t); + } +vst1_s32_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2i32.p0i32")] + fn vst1_s32_x4_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32); + } +vst1_s32_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v1i64")] + fn vst1_s64_x4_(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t); + } +vst1_s64_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v1i64.p0i64")] + fn vst1_s64_x4_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64); + } +vst1_s64_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v16i8")] + fn vst1q_s8_x4_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t); + } +vst1q_s8_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v16i8.p0i8")] + fn vst1q_s8_x4_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); + } +vst1q_s8_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v8i16")] + fn vst1q_s16_x4_(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t); + } +vst1q_s16_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v8i16.p0i16")] + fn vst1q_s16_x4_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16); + } +vst1q_s16_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v4i32")] + fn vst1q_s32_x4_(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t); + } +vst1q_s32_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4i32.p0i32")] + fn vst1q_s32_x4_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32); + } +vst1q_s32_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v2i64")] + fn vst1q_s64_x4_(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t); + } +vst1q_s64_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2i64.p0i64")] + fn vst1q_s64_x4_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64); + } +vst1q_s64_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { + vst1_s32_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { + vst1q_s32_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { + vst1_s32_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { + vst1q_s32_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) { + vst1_s32_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { + vst1q_s32_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v2f32")] + fn vst1_f32_x2_(ptr: *mut f32, a: float32x2_t, b: float32x2_t); + } +vst1_f32_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2f32.p0f32")] + fn vst1_f32_x2_(a: float32x2_t, b: float32x2_t, ptr: *mut f32); + } +vst1_f32_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v4f32")] + fn vst1q_f32_x2_(ptr: *mut f32, a: float32x4_t, b: float32x4_t); + } +vst1q_f32_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4f32.p0f32")] + fn vst1q_f32_x2_(a: float32x4_t, b: float32x4_t, ptr: *mut f32); + } +vst1q_f32_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v2f32")] + fn vst1_f32_x3_(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t); + } +vst1_f32_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2f32.p0f32")] + fn vst1_f32_x3_(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32); + } +vst1_f32_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v4f32")] + fn vst1q_f32_x3_(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t); + } +vst1q_f32_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4f32.p0f32")] + fn vst1q_f32_x3_(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32); + } +vst1q_f32_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v2f32")] + fn vst1_f32_x4_(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t); + } +vst1_f32_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2f32.p0f32")] + fn vst1_f32_x4_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut f32); + } +vst1_f32_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v4f32")] + fn vst1q_f32_x4_(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t); + } +vst1q_f32_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4f32.p0f32")] + fn vst1q_f32_x4_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut f32); + } +vst1q_f32_x4_(b.0, b.1, b.2, b.3, a) +} + /// Multiply #[inline] #[target_feature(enable = "neon")] @@ -20329,6 +21529,600 @@ mod test { assert_eq!(r, e); } + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s8_x2() { + let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i8; 16] = [0i8; 16]; + vst1_s8_x2(r.as_mut_ptr(), vld1_s8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s16_x2() { + let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [i16; 8] = [0i16; 8]; + vst1_s16_x2(r.as_mut_ptr(), vld1_s16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s32_x2() { + let a: [i32; 5] = [0, 1, 2, 3, 4]; + let e: [i32; 4] = [1, 2, 3, 4]; + let mut r: [i32; 4] = [0i32; 4]; + vst1_s32_x2(r.as_mut_ptr(), vld1_s32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s64_x2() { + let a: [i64; 3] = [0, 1, 2]; + let e: [i64; 2] = [1, 2]; + let mut r: [i64; 2] = [0i64; 2]; + vst1_s64_x2(r.as_mut_ptr(), vld1_s64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s8_x2() { + let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [i8; 32] = [0i8; 32]; + vst1q_s8_x2(r.as_mut_ptr(), vld1q_s8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s16_x2() { + let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i16; 16] = [0i16; 16]; + vst1q_s16_x2(r.as_mut_ptr(), vld1q_s16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s32_x2() { + let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [i32; 8] = [0i32; 8]; + vst1q_s32_x2(r.as_mut_ptr(), vld1q_s32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s64_x2() { + let a: [i64; 5] = [0, 1, 2, 3, 4]; + let e: [i64; 4] = [1, 2, 3, 4]; + let mut r: [i64; 4] = [0i64; 4]; + vst1q_s64_x2(r.as_mut_ptr(), vld1q_s64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s8_x3() { + let a: [i8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [i8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [i8; 24] = [0i8; 24]; + vst1_s8_x3(r.as_mut_ptr(), vld1_s8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s16_x3() { + let a: [i16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [i16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [i16; 12] = [0i16; 12]; + vst1_s16_x3(r.as_mut_ptr(), vld1_s16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s32_x3() { + let a: [i32; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [i32; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [i32; 6] = [0i32; 6]; + vst1_s32_x3(r.as_mut_ptr(), vld1_s32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s64_x3() { + let a: [i64; 4] = [0, 1, 2, 3]; + let e: [i64; 3] = [1, 2, 3]; + let mut r: [i64; 3] = [0i64; 3]; + vst1_s64_x3(r.as_mut_ptr(), vld1_s64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s8_x3() { + let a: [i8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i8; 48] = [0i8; 48]; + vst1q_s8_x3(r.as_mut_ptr(), vld1q_s8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s16_x3() { + let a: [i16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [i16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [i16; 24] = [0i16; 24]; + vst1q_s16_x3(r.as_mut_ptr(), vld1q_s16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s32_x3() { + let a: [i32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [i32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [i32; 12] = [0i32; 12]; + vst1q_s32_x3(r.as_mut_ptr(), vld1q_s32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s64_x3() { + let a: [i64; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [i64; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [i64; 6] = [0i64; 6]; + vst1q_s64_x3(r.as_mut_ptr(), vld1q_s64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s8_x4() { + let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [i8; 32] = [0i8; 32]; + vst1_s8_x4(r.as_mut_ptr(), vld1_s8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s16_x4() { + let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i16; 16] = [0i16; 16]; + vst1_s16_x4(r.as_mut_ptr(), vld1_s16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s32_x4() { + let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [i32; 8] = [0i32; 8]; + vst1_s32_x4(r.as_mut_ptr(), vld1_s32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s64_x4() { + let a: [i64; 5] = [0, 1, 2, 3, 4]; + let e: [i64; 4] = [1, 2, 3, 4]; + let mut r: [i64; 4] = [0i64; 4]; + vst1_s64_x4(r.as_mut_ptr(), vld1_s64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s8_x4() { + let a: [i8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [i8; 64] = [0i8; 64]; + vst1q_s8_x4(r.as_mut_ptr(), vld1q_s8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s16_x4() { + let a: [i16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [i16; 32] = [0i16; 32]; + vst1q_s16_x4(r.as_mut_ptr(), vld1q_s16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s32_x4() { + let a: [i32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i32; 16] = [0i32; 16]; + vst1q_s32_x4(r.as_mut_ptr(), vld1q_s32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s64_x4() { + let a: [i64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [i64; 8] = [0i64; 8]; + vst1q_s64_x4(r.as_mut_ptr(), vld1q_s64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u8_x2() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u8; 16] = [0u8; 16]; + vst1_u8_x2(r.as_mut_ptr(), vld1_u8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u16_x2() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u16; 8] = [0u16; 8]; + vst1_u16_x2(r.as_mut_ptr(), vld1_u16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u32_x2() { + let a: [u32; 5] = [0, 1, 2, 3, 4]; + let e: [u32; 4] = [1, 2, 3, 4]; + let mut r: [u32; 4] = [0u32; 4]; + vst1_u32_x2(r.as_mut_ptr(), vld1_u32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u64_x2() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64; 2] = [1, 2]; + let mut r: [u64; 2] = [0u64; 2]; + vst1_u64_x2(r.as_mut_ptr(), vld1_u64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u8_x2() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst1q_u8_x2(r.as_mut_ptr(), vld1q_u8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u16_x2() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst1q_u16_x2(r.as_mut_ptr(), vld1q_u16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u32_x2() { + let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u32; 8] = [0u32; 8]; + vst1q_u32_x2(r.as_mut_ptr(), vld1q_u32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u64_x2() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [u64; 4] = [1, 2, 3, 4]; + let mut r: [u64; 4] = [0u64; 4]; + vst1q_u64_x2(r.as_mut_ptr(), vld1q_u64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u8_x3() { + let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [u8; 24] = [0u8; 24]; + vst1_u8_x3(r.as_mut_ptr(), vld1_u8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u16_x3() { + let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [u16; 12] = [0u16; 12]; + vst1_u16_x3(r.as_mut_ptr(), vld1_u16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u32_x3() { + let a: [u32; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [u32; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [u32; 6] = [0u32; 6]; + vst1_u32_x3(r.as_mut_ptr(), vld1_u32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u64_x3() { + let a: [u64; 4] = [0, 1, 2, 3]; + let e: [u64; 3] = [1, 2, 3]; + let mut r: [u64; 3] = [0u64; 3]; + vst1_u64_x3(r.as_mut_ptr(), vld1_u64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u8_x3() { + let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u8; 48] = [0u8; 48]; + vst1q_u8_x3(r.as_mut_ptr(), vld1q_u8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u16_x3() { + let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [u16; 24] = [0u16; 24]; + vst1q_u16_x3(r.as_mut_ptr(), vld1q_u16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u32_x3() { + let a: [u32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [u32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [u32; 12] = [0u32; 12]; + vst1q_u32_x3(r.as_mut_ptr(), vld1q_u32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u64_x3() { + let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [u64; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [u64; 6] = [0u64; 6]; + vst1q_u64_x3(r.as_mut_ptr(), vld1q_u64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u8_x4() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst1_u8_x4(r.as_mut_ptr(), vld1_u8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u16_x4() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst1_u16_x4(r.as_mut_ptr(), vld1_u16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u32_x4() { + let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u32; 8] = [0u32; 8]; + vst1_u32_x4(r.as_mut_ptr(), vld1_u32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u64_x4() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [u64; 4] = [1, 2, 3, 4]; + let mut r: [u64; 4] = [0u64; 4]; + vst1_u64_x4(r.as_mut_ptr(), vld1_u64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u8_x4() { + let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 64] = [0u8; 64]; + vst1q_u8_x4(r.as_mut_ptr(), vld1q_u8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u16_x4() { + let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u16; 32] = [0u16; 32]; + vst1q_u16_x4(r.as_mut_ptr(), vld1q_u16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u32_x4() { + let a: [u32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u32; 16] = [0u32; 16]; + vst1q_u32_x4(r.as_mut_ptr(), vld1q_u32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u64_x4() { + let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u64; 8] = [0u64; 8]; + vst1q_u64_x4(r.as_mut_ptr(), vld1q_u64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p8_x2() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u8; 16] = [0u8; 16]; + vst1_p8_x2(r.as_mut_ptr(), vld1_p8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p8_x3() { + let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [u8; 24] = [0u8; 24]; + vst1_p8_x3(r.as_mut_ptr(), vld1_p8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p8_x4() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst1_p8_x4(r.as_mut_ptr(), vld1_p8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p8_x2() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst1q_p8_x2(r.as_mut_ptr(), vld1q_p8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p8_x3() { + let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u8; 48] = [0u8; 48]; + vst1q_p8_x3(r.as_mut_ptr(), vld1q_p8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p8_x4() { + let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 64] = [0u8; 64]; + vst1q_p8_x4(r.as_mut_ptr(), vld1q_p8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p16_x2() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u16; 8] = [0u16; 8]; + vst1_p16_x2(r.as_mut_ptr(), vld1_p16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p16_x3() { + let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [u16; 12] = [0u16; 12]; + vst1_p16_x3(r.as_mut_ptr(), vld1_p16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p16_x4() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst1_p16_x4(r.as_mut_ptr(), vld1_p16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p16_x2() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst1q_p16_x2(r.as_mut_ptr(), vld1q_p16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p16_x3() { + let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [u16; 24] = [0u16; 24]; + vst1q_p16_x3(r.as_mut_ptr(), vld1q_p16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p16_x4() { + let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u16; 32] = [0u16; 32]; + vst1q_p16_x4(r.as_mut_ptr(), vld1q_p16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f32_x2() { + let a: [f32; 5] = [0., 1., 2., 3., 4.]; + let e: [f32; 4] = [1., 2., 3., 4.]; + let mut r: [f32; 4] = [0f32; 4]; + vst1_f32_x2(r.as_mut_ptr(), vld1_f32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f32_x2() { + let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; + let mut r: [f32; 8] = [0f32; 8]; + vst1q_f32_x2(r.as_mut_ptr(), vld1q_f32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f32_x3() { + let a: [f32; 7] = [0., 1., 2., 3., 4., 5., 6.]; + let e: [f32; 6] = [1., 2., 3., 4., 5., 6.]; + let mut r: [f32; 6] = [0f32; 6]; + vst1_f32_x3(r.as_mut_ptr(), vld1_f32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f32_x3() { + let a: [f32; 13] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]; + let e: [f32; 12] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]; + let mut r: [f32; 12] = [0f32; 12]; + vst1q_f32_x3(r.as_mut_ptr(), vld1q_f32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f32_x4() { + let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; + let mut r: [f32; 8] = [0f32; 8]; + vst1_f32_x4(r.as_mut_ptr(), vld1_f32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f32_x4() { + let a: [f32; 17] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]; + let e: [f32; 16] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]; + let mut r: [f32; 16] = [0f32; 16]; + vst1q_f32_x4(r.as_mut_ptr(), vld1q_f32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vmul_s8() { let a: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2); diff --git a/library/stdarch/crates/stdarch-gen/neon.spec b/library/stdarch/crates/stdarch-gen/neon.spec index 2e2e3cee40b2..789a39488502 100644 --- a/library/stdarch/crates/stdarch-gen/neon.spec +++ b/library/stdarch/crates/stdarch-gen/neon.spec @@ -955,6 +955,7 @@ multi_fn = static_assert-N-1-bits a = 1, 2, 3, 4 n = 2 validate 0.25, 0.5, 0.75, 1. +arm-aarch64-separate aarch64 = scvtf link-aarch64 = vcvtfxs2fp._EXT2_._EXT_ @@ -971,6 +972,7 @@ link-aarch64 = vcvtfxs2fp._EXT2_._EXT_ arm = vcvt link-arm = vcvtfxs2fp._EXT2_._EXT_ const-arm = N:i32 + generate int32x2_t:float32x2_t, int32x4_t:float32x4_t aarch64 = ucvtf @@ -988,6 +990,7 @@ multi_fn = static_assert-N-1-bits a = 0.25, 0.5, 0.75, 1. n = 2 validate 1, 2, 3, 4 +arm-aarch64-separate aarch64 = fcvtzs link-aarch64 = vcvtfp2fxs._EXT2_._EXT_ @@ -2038,7 +2041,7 @@ name = vld1 out-suffix a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 -test = load_test +load_fn aarch64 = ld1 link-aarch64 = ld1x2._EXT2_ @@ -2064,7 +2067,7 @@ multi_fn = transmute, {vld1-outsigned-noext, transmute(a)} a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 -test = load_test +load_fn aarch64 = ld1 arm = vld1 generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t @@ -2083,7 +2086,7 @@ name = vld1 out-suffix a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16. validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16. -test = load_test +load_fn aarch64 = ld1 link-aarch64 = ld1x2._EXT2_ @@ -2108,6 +2111,80 @@ link-aarch64 = ld1x4._EXT2_ link-arm = vld1x4._EXT2_ generate *const f32:float32x2x4_t, *const f32:float32x4x4_t +/// Store multiple single-element structures from one, two, three, or four registers +name = vst1 +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 +store_fn +arm-aarch64-separate + +aarch64 = st1 +link-aarch64 = st1x2._EXT3_ +arm = vst1 +link-arm = vst1x2._EXT3_ +generate *mut i8:int8x8x2_t:void, *mut i16:int16x4x2_t:void, *mut i32:int32x2x2_t:void, *mut i64:int64x1x2_t:void +generate *mut i8:int8x16x2_t:void, *mut i16:int16x8x2_t:void, *mut i32:int32x4x2_t:void, *mut i64:int64x2x2_t:void + +link-aarch64 = st1x3._EXT3_ +link-arm = vst1x3._EXT3_ +generate *mut i8:int8x8x3_t:void, *mut i16:int16x4x3_t:void, *mut i32:int32x2x3_t:void, *mut i64:int64x1x3_t:void +generate *mut i8:int8x16x3_t:void, *mut i16:int16x8x3_t:void, *mut i32:int32x4x3_t:void, *mut i64:int64x2x3_t:void + +link-aarch64 = st1x4._EXT3_ +link-arm = vst1x4._EXT3_ +generate *mut i8:int8x8x4_t:void, *mut i16:int16x4x4_t:void, *mut i32:int32x2x4_t:void, *mut i64:int64x1x4_t:void +generate *mut i8:int8x16x4_t:void, *mut i16:int16x8x4_t:void, *mut i32:int32x4x4_t:void, *mut i64:int64x2x4_t:void + +/// Store multiple single-element structures to one, two, three, or four registers +name = vst1 +multi_fn = vst1-signed-noext, transmute(a), transmute(b) +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 +validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 + +store_fn +aarch64 = st1 +arm = vst1 +generate *mut u8:uint8x8x2_t:void, *mut u16:uint16x4x2_t:void, *mut u32:uint32x2x2_t:void, *mut u64:uint64x1x2_t:void +generate *mut u8:uint8x16x2_t:void, *mut u16:uint16x8x2_t:void, *mut u32:uint32x4x2_t:void, *mut u64:uint64x2x2_t:void +generate *mut u8:uint8x8x3_t:void, *mut u16:uint16x4x3_t:void, *mut u32:uint32x2x3_t:void, *mut u64:uint64x1x3_t:void +generate *mut u8:uint8x16x3_t:void, *mut u16:uint16x8x3_t:void, *mut u32:uint32x4x3_t:void, *mut u64:uint64x2x3_t:void +generate *mut u8:uint8x8x4_t:void, *mut u16:uint16x4x4_t:void, *mut u32:uint32x2x4_t:void, *mut u64:uint64x1x4_t:void +generate *mut u8:uint8x16x4_t:void, *mut u16:uint16x8x4_t:void, *mut u32:uint32x4x4_t:void, *mut u64:uint64x2x4_t:void +generate *mut p8:poly8x8x2_t:void, *mut p8:poly8x8x3_t:void, *mut p8:poly8x8x4_t:void +generate *mut p8:poly8x16x2_t:void, *mut p8:poly8x16x3_t:void, *mut p8:poly8x16x4_t:void +generate *mut p16:poly16x4x2_t:void, *mut p16:poly16x4x3_t:void, *mut p16:poly16x4x4_t:void +generate *mut p16:poly16x8x2_t:void, *mut p16:poly16x8x3_t:void, *mut p16:poly16x8x4_t:void + +/// Store multiple single-element structures to one, two, three, or four registers +name = vst1 +a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16. +validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16. +store_fn +arm-aarch64-separate + +aarch64 = st1 +link-aarch64 = st1x2._EXT3_ +generate *mut f64:float64x1x2_t:void, *mut f64:float64x2x2_t:void + +link-aarch64 = st1x3._EXT3_ +generate *mut f64:float64x1x3_t:void, *mut f64:float64x2x3_t:void + +link-aarch64 = st1x4._EXT3_ +generate *mut f64:float64x1x4_t:void, *mut f64:float64x2x4_t:void + +arm = vst1 +link-aarch64 = st1x2._EXT3_ +link-arm = vst1x2._EXT3_ +generate *mut f32:float32x2x2_t:void, *mut f32:float32x4x2_t:void + +link-aarch64 = st1x3._EXT3_ +link-arm = vst1x3._EXT3_ +generate *mut f32:float32x2x3_t:void, *mut f32:float32x4x3_t:void + +link-aarch64 = st1x4._EXT3_ +link-arm = vst1x4._EXT3_ +generate *mut f32:float32x2x4_t:void, *mut f32:float32x4x4_t:void + /// Multiply name = vmul a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 @@ -3869,6 +3946,7 @@ const-aarch64 = N arm = vqrshrn link-arm = vqrshiftns._EXT2_ const-arm = -N as ttn +arm-aarch64-separate generate int16x8_t:int8x8_t, int32x4_t:int16x4_t, int64x2_t:int32x2_t /// Signed saturating rounded shift right narrow @@ -3915,6 +3993,7 @@ const-aarch64 = N arm = vqrshrn link-arm = vqrshiftnu._EXT2_ const-arm = -N as ttn +arm-aarch64-separate generate uint16x8_t:uint8x8_t, uint32x4_t:uint16x4_t, uint64x2_t:uint32x2_t /// Unsigned saturating rounded shift right narrow @@ -3961,6 +4040,7 @@ const-aarch64 = N arm = vqrshrun link-arm = vqrshiftnsu._EXT2_ const-arm = -N as ttn +arm-aarch64-separate generate int16x8_t:uint8x8_t, int32x4_t:uint16x4_t, int64x2_t:uint32x2_t /// Signed saturating rounded shift right unsigned narrow @@ -4106,6 +4186,7 @@ multi_fn = static_assert-N-1-halfbits a = 0, 4, 8, 12, 16, 20, 24, 28 n = 2 validate 0, 1, 2, 3, 4, 5, 6, 7 +arm-aarch64-separate aarch64 = sqshrn link-aarch64 = sqshrn._EXT2_ @@ -4152,6 +4233,7 @@ multi_fn = static_assert-N-1-halfbits a = 0, 4, 8, 12, 16, 20, 24, 28 n = 2 validate 0, 1, 2, 3, 4, 5, 6, 7 +arm-aarch64-separate aarch64 = uqshrn link-aarch64 = uqshrn._EXT2_ @@ -4198,6 +4280,7 @@ multi_fn = static_assert-N-1-halfbits a = 0, 4, 8, 12, 16, 20, 24, 28 n = 2 validate 0, 1, 2, 3, 4, 5, 6, 7 +arm-aarch64-separate aarch64 = sqshrun link-aarch64 = sqshrun._EXT2_ @@ -4542,6 +4625,7 @@ multi_fn = static_assert-N-1-halfbits a = 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64 n = 2 validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +arm-aarch64-separate aarch64 = rshrn link-aarch64 = rshrn._EXT2_ diff --git a/library/stdarch/crates/stdarch-gen/src/main.rs b/library/stdarch/crates/stdarch-gen/src/main.rs index 493eb05a24b6..82149064d2f2 100644 --- a/library/stdarch/crates/stdarch-gen/src/main.rs +++ b/library/stdarch/crates/stdarch-gen/src/main.rs @@ -448,6 +448,13 @@ enum TargetFeature { AES, } +#[derive(Clone, Copy)] +enum Fntype { + Normal, + Load, + Store, +} + fn type_to_global_type(t: &str) -> &str { match t { "int8x8_t" | "int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t" => "i8x8", @@ -496,35 +503,30 @@ fn type_to_global_type(t: &str) -> &str { } } -fn type_to_native_type(t: &str) -> &str { - match t { - "int8x8_t" | "int8x16_t" | "i8" | "int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t" - | "int8x16x2_t" | "int8x16x3_t" | "int8x16x4_t" => "i8", - "int16x4_t" | "int16x8_t" | "i16" | "int16x4x2_t" | "int16x4x3_t" | "int16x4x4_t" - | "int16x8x2_t" | "int16x8x3_t" | "int16x8x4_t" => "i16", - "int32x2_t" | "int32x4_t" | "i32" | "int32x2x2_t" | "int32x2x3_t" | "int32x2x4_t" - | "int32x4x2_t" | "int32x4x3_t" | "int32x4x4_t" => "i32", - "int64x1_t" | "int64x2_t" | "i64" | "int64x1x2_t" | "int64x1x3_t" | "int64x1x4_t" - | "int64x2x2_t" | "int64x2x3_t" | "int64x2x4_t" => "i64", - "uint8x8_t" | "uint8x16_t" | "u8" | "uint8x8x2_t" | "uint8x8x3_t" | "uint8x8x4_t" - | "uint8x16x2_t" | "uint8x16x3_t" | "uint8x16x4_t" => "u8", - "uint16x4_t" | "uint16x8_t" | "u16" | "uint16x4x2_t" | "uint16x4x3_t" | "uint16x4x4_t" - | "uint16x8x2_t" | "uint16x8x3_t" | "uint16x8x4_t" => "u16", - "uint32x2_t" | "uint32x4_t" | "u32" | "uint32x2x2_t" | "uint32x2x3_t" | "uint32x2x4_t" - | "uint32x4x2_t" | "uint32x4x3_t" | "uint32x4x4_t" => "u32", - "uint64x1_t" | "uint64x2_t" | "u64" | "uint64x1x2_t" | "uint64x1x3_t" | "uint64x1x4_t" - | "uint64x2x2_t" | "uint64x2x3_t" | "uint64x2x4_t" => "u64", - "float16x4_t" | "float16x8_t" => "f16", - "float32x2_t" | "float32x4_t" | "float32x2x2_t" | "float32x2x3_t" | "float32x2x4_t" - | "float32x4x2_t" | "float32x4x3_t" | "float32x4x4_t" => "f32", - "float64x1_t" | "float64x2_t" | "float64x1x2_t" | "float64x1x3_t" | "float64x1x4_t" - | "float64x2x2_t" | "float64x2x3_t" | "float64x2x4_t" => "f64", - "poly8x8_t" | "poly8x16_t" | "poly8x8x2_t" | "poly8x8x3_t" | "poly8x8x4_t" - | "poly8x16x2_t" | "poly8x16x3_t" | "poly8x16x4_t" => "u8", - "poly16x4_t" | "poly16x8_t" | "poly16x4x2_t" | "poly16x4x3_t" | "poly16x4x4_t" - | "poly16x8x2_t" | "poly16x8x3_t" | "poly16x8x4_t" => "u16", - "poly64x1_t" | "poly64x2_t" | "poly64x1x2_t" | "poly64x1x3_t" | "poly64x1x4_t" - | "poly64x2x2_t" | "poly64x2x3_t" | "poly64x2x4_t" => "u64", +fn type_to_sub_type(t: &str) -> String { + let s: Vec<_> = t.split('x').collect(); + match s.len() { + 2 => String::from(t), + 3 => format!("{}x{}_t", s[0], s[1]), + _ => panic!("unknown type: {}", t), + } +} + +fn type_to_native_type(t: &str) -> String { + let s: Vec<_> = t.split('x').collect(); + match s.len() { + 1 => { + assert!(t.contains("*const") || t.contains("*mut")); + let sub: Vec<_> = t.split(' ').collect(); + String::from(sub[1]) + } + 2 | 3 => match &s[0][0..3] { + "int" => format!("i{}", &s[0][3..]), + "uin" => format!("u{}", &s[0][4..]), + "flo" => format!("f{}", &s[0][5..]), + "pol" => format!("u{}", &s[0][4..]), + _ => panic!("unknown type: {}", t), + }, _ => panic!("unknown type: {}", t), } } @@ -563,82 +565,26 @@ fn native_type_to_long_type(t: &str) -> &str { } } -fn type_to_ext(t: &str) -> &str { - match t { - "int8x8_t" => "v8i8", - "int8x16_t" => "v16i8", - "int16x4_t" => "v4i16", - "int16x8_t" => "v8i16", - "int32x2_t" => "v2i32", - "int32x4_t" => "v4i32", - "int64x1_t" => "v1i64", - "int64x2_t" => "v2i64", - "uint8x8_t" => "v8i8", - "uint8x16_t" => "v16i8", - "uint16x4_t" => "v4i16", - "uint16x8_t" => "v8i16", - "uint32x2_t" => "v2i32", - "uint32x4_t" => "v4i32", - "uint64x1_t" => "v1i64", - "uint64x2_t" => "v2i64", - "float16x4_t" => "v4f16", - "float16x8_t" => "v8f16", - "float32x2_t" => "v2f32", - "float32x4_t" => "v4f32", - "float64x1_t" => "v1f64", - "float64x2_t" => "v2f64", - "poly8x8_t" => "v8i8", - "poly8x16_t" => "v16i8", - "poly16x4_t" => "v4i16", - "poly16x8_t" => "v8i16", - "int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t" => "v8i8.p0i8", - "int16x4x2_t" | "int16x4x3_t" | "int16x4x4_t" => "v4i16.p0i16", - "int32x2x2_t" | "int32x2x3_t" | "int32x2x4_t" => "v2i32.p0i32", - "int64x1x2_t" | "int64x1x3_t" | "int64x1x4_t" => "v1i64.p0i64", - "uint8x8x2_t" | "uint8x8x3_t" | "uint8x8x4_t" => "v8i8.p0i8", - "uint16x4x2_t" | "uint16x4x3_t" | "uint16x4x4_t" => "v4i16.p0i16", - "uint32x2x2_t" | "uint32x2x3_t" | "uint32x2x4_t" => "v2i32.p0i32", - "uint64x1x2_t" | "uint64x1x3_t" | "uint64x1x4_t" => "v1i64.p0i64", - "float32x2x2_t" | "float32x2x3_t" | "float32x2x4_t" => "v2f32.p0f32", - "float64x1x2_t" | "float64x1x3_t" | "float64x1x4_t" => "v1f64.p0f64", - "int8x16x2_t" | "int8x16x3_t" | "int8x16x4_t" => "v16i8.p0i8", - "int16x8x2_t" | "int16x8x3_t" | "int16x8x4_t" => "v8i16.p0i16", - "int32x4x2_t" | "int32x4x3_t" | "int32x4x4_t" => "v4i32.p0i32", - "int64x2x2_t" | "int64x2x3_t" | "int64x2x4_t" => "v2i64.p0i64", - "uint8x16x2_t" | "uint8x16x3_t" | "uint8x16x4_t" => "v16i8.p0i8", - "uint16x8x2_t" | "uint16x8x3_t" | "uint16x8x4_t" => "v8i16.p0i16", - "uint32x4x2_t" | "uint32x4x3_t" | "uint32x4x4_t" => "v4i32.p0i32", - "uint64x2x2_t" | "uint64x2x3_t" | "uint64x2x4_t" => "v2i64.p0i64", - "float32x4x2_t" | "float32x4x3_t" | "float32x4x4_t" => "v4f32.p0f32", - "float64x2x2_t" | "float64x2x3_t" | "float64x2x4_t" => "v2f64.p0f64", - "i8" => "i8", - "i16" => "i16", - "i32" => "i32", - "i64" => "i64", - "u8" => "i8", - "u16" => "i16", - "u32" => "i32", - "u64" => "i64", - "f32" => "f32", - "f64" => "f64", - "p64" => "p64", - "p128" => "p128", - "*const i8" => "i8", - "*const i16" => "i16", - "*const i32" => "i32", - "*const i64" => "i64", - "*const u8" => "i8", - "*const u16" => "i16", - "*const u32" => "i32", - "*const u64" => "i64", - "*const f32" => "f32", - "*const f64" => "f64", - /* - "poly64x1_t" => "i64x1", - "poly64x2_t" => "i64x2", - */ - _ => panic!("unknown type for extension: {}", t), +fn type_to_ext(t: &str) -> String { + if !t.contains('x') { + return t.replace("u", "i"); } + let native = type_to_native_type(t); + let sub_ext = match type_sub_len(t) { + 1 => String::new(), + _ => format!(".p0{}", native), + }; + let sub_type = match &native[0..1] { + "i" | "f" => native, + "u" => native.replace("u", "i"), + _ => panic!("unknown type: {}", t), + }; + format!( + "v{}{}{}", + &type_len(&type_to_sub_type(t)).to_string(), + sub_type, + sub_ext + ) } fn type_to_half(t: &str) -> &str { @@ -969,7 +915,7 @@ fn gen_aarch64( target: TargetFeature, fixed: &Vec, multi_fn: &Vec, - test_fn: &str, + fn_type: Fntype, ) -> (String, String) { let name = match suffix { Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])), @@ -1022,6 +968,7 @@ fn gen_aarch64( let current_aarch64 = current_aarch64.clone().unwrap(); let mut ext_c = String::new(); let mut ext_c_const = String::new(); + let mut ext_c_store = String::new(); let mut link_t: Vec = vec![ in_t[0].to_string(), in_t[1].to_string(), @@ -1042,12 +989,18 @@ fn gen_aarch64( } let ext = type_to_ext(in_t[0]); let ext2 = type_to_ext(out_t); + let ext3 = type_to_ext(in_t[1]); let link_aarch64 = if link_aarch64.starts_with("llvm") { - link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2) + link_aarch64 + .replace("_EXT_", &ext) + .replace("_EXT2_", &ext2) + .replace("_EXT3_", &ext3) } else { let mut link = String::from("llvm.aarch64.neon."); link.push_str(&link_aarch64); - link.replace("_EXT_", ext).replace("_EXT2_", ext2) + link.replace("_EXT_", &ext) + .replace("_EXT2_", &ext2) + .replace("_EXT3_", &ext3) }; ext_c = format!( r#"#[allow(improper_ctypes)] @@ -1097,6 +1050,38 @@ fn gen_aarch64( out_t ); } + if matches!(fn_type, Fntype::Store) { + let sub = type_to_sub_type(in_t[1]); + let native = type_to_native_type(in_t[1]); + ext_c_store = format!( + r#"#[allow(improper_ctypes)] + extern "unadjusted" {{ + #[cfg_attr(target_arch = "aarch64", link_name = "{}")] + fn {}({}); + }} + "#, + link_aarch64, + current_fn, + match type_sub_len(in_t[1]) { + 1 => { + format!("a: {}, ptr: *mut {}", sub, native,) + } + 2 => { + format!("a: {}, b: {}, ptr: *mut {}", sub, sub, native,) + } + 3 => { + format!("a: {}, b: {}, c: {}, ptr: *mut {}", sub, sub, sub, native,) + } + 4 => { + format!( + "a: {}, b: {}, c: {}, d: {}, ptr: *mut {}", + sub, sub, sub, sub, native, + ) + } + _ => panic!("unsupported type: {}", in_t[1]), + }, + ); + } }; let const_declare = if let Some(constn) = constn { if constn.contains(":") { @@ -1166,10 +1151,22 @@ fn gen_aarch64( } else { String::new() }; - let trans: [&str; 2] = if link_t[3] != out_t { - ["transmute(", ")"] - } else { - ["", ""] + let fn_decl = { + let fn_output = if out_t == "void" { + String::new() + } else { + format!("-> {} ", out_t) + }; + let fn_inputs = match para_num { + 1 => format!("(a: {})", in_t[0]), + 2 => format!("(a: {}, b: {})", in_t[0], in_t[1]), + 3 => format!("(a: {}, b: {}, c: {})", in_t[0], in_t[1], in_t[2]), + _ => panic!("unsupported parameter number"), + }; + format!( + "pub unsafe fn {}{}{} {}", + name, const_declare, fn_inputs, fn_output + ) }; let call = if let Some(const_aarch64) = const_aarch64 { match para_num { @@ -1202,25 +1199,55 @@ fn gen_aarch64( ), _ => String::new(), } + } else if matches!(fn_type, Fntype::Store) { + match type_sub_len(in_t[1]) { + 1 => format!( + r#"{}{{ + {}{}(b, a) +}}"#, + fn_decl, ext_c_store, current_fn, + ), + 2 => format!( + r#"{}{{ + {}{}(b.0, b.1, a) +}}"#, + fn_decl, ext_c_store, current_fn, + ), + 3 => format!( + r#"{}{{ + {}{}(b.0, b.1, b.2, a) +}}"#, + fn_decl, ext_c_store, current_fn, + ), + 4 => format!( + r#"{}{{ + {}{}(b.0, b.1, b.2, b.3, a) +}}"#, + fn_decl, ext_c_store, current_fn, + ), + _ => panic!("unsupported type: {}", in_t[1]), + } } else { + let trans: [&str; 2] = if link_t[3] != out_t { + ["transmute(", ")"] + } else { + ["", ""] + }; match (multi_calls.len(), para_num, fixed.len()) { (0, 1, 0) => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + r#"{}{{ {}{}{}(a){} }}"#, - name, const_declare, in_t[0], out_t, ext_c, trans[0], current_fn, trans[1] + fn_decl, ext_c, trans[0], current_fn, trans[1] ), (0, 1, _) => { let fixed: Vec = fixed.iter().take(type_len(in_t[0])).cloned().collect(); format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + r#"{}{{ let b{}; {}{}{}(a, transmute(b)){} }}"#, - name, - const_declare, - in_t[0], - out_t, + fn_decl, values(in_t[0], &fixed), ext_c, trans[0], @@ -1229,34 +1256,34 @@ fn gen_aarch64( ) } (0, 2, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ + r#"{}{{ {}{}{}(a, b){} }}"#, - name, const_declare, in_t[0], in_t[1], out_t, ext_c, trans[0], current_fn, trans[1], + fn_decl, ext_c, trans[0], current_fn, trans[1], ), (0, 3, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ + r#"{}{{ {}{}(a, b, c) }}"#, - name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, + fn_decl, ext_c, current_fn, ), (_, 1, _) => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + r#"{}{{ {}{} }}"#, - name, const_declare, in_t[0], out_t, ext_c, multi_calls, + fn_decl, ext_c, multi_calls, ), (_, 2, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ + r#"{}{{ {}{} }}"#, - name, const_declare, in_t[0], in_t[1], out_t, ext_c, multi_calls, + fn_decl, ext_c, multi_calls, ), (_, 3, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ + r#"{}{{ {}{} }}"#, - name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, + fn_decl, ext_c, multi_calls, ), (_, _, _) => String::new(), } @@ -1271,11 +1298,8 @@ fn gen_aarch64( "#, current_comment, current_target, current_aarch64, const_assert, const_legacy, call ); - - let test = if test_fn == "load_test" { - gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t)) - } else { - gen_test( + let test = match fn_type { + Fntype::Normal => gen_test( &name, in_t, &out_t, @@ -1283,10 +1307,13 @@ fn gen_aarch64( [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])], type_len(out_t), para_num, - ) + ), + Fntype::Load => gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t)), + Fntype::Store => gen_store_test(&name, in_t, &out_t, current_tests, type_len(in_t[1])), }; (function, test) } + fn gen_load_test( name: &str, _in_t: &[&str; 3], @@ -1298,7 +1325,7 @@ fn gen_load_test( Option, Vec, )], - len_out: usize, + type_len: usize, ) -> String { let mut test = format!( r#" @@ -1307,10 +1334,10 @@ fn gen_load_test( name, ); for (a, _, _, _, e) in current_tests { - let a: Vec = a.iter().take(len_out + 1).cloned().collect(); - let e: Vec = e.iter().take(len_out).cloned().collect(); + let a: Vec = a.iter().take(type_len + 1).cloned().collect(); + let e: Vec = e.iter().take(type_len).cloned().collect(); let mut input = String::from("["); - for i in 0..type_len(out_t) + 1 { + for i in 0..type_len + 1 { if i != 0 { input.push_str(", "); } @@ -1322,7 +1349,7 @@ fn gen_load_test( if i != 0 { output.push_str(", "); } - let sub_len = type_len(out_t) / type_sub_len(out_t); + let sub_len = type_len / type_sub_len(out_t); if type_to_global_type(out_t) != "f64" { let mut sub_output = format!("{}::new(", type_to_global_type(out_t)); for j in 0..sub_len { @@ -1346,7 +1373,7 @@ fn gen_load_test( assert_eq!(r, e); "#, type_to_native_type(out_t), - type_len(out_t) + 1, + type_len + 1, input, type_to_global_type(out_t), type_sub_len(out_t), @@ -1361,6 +1388,71 @@ fn gen_load_test( test } +fn gen_store_test( + name: &str, + in_t: &[&str; 3], + _out_t: &str, + current_tests: &[( + Vec, + Vec, + Vec, + Option, + Vec, + )], + type_len: usize, +) -> String { + let mut test = format!( + r#" + #[simd_test(enable = "neon")] + unsafe fn test_{}() {{"#, + name, + ); + for (a, _, _, _, e) in current_tests { + let a: Vec = a.iter().take(type_len + 1).cloned().collect(); + let e: Vec = e.iter().take(type_len).cloned().collect(); + let mut input = String::from("["); + for i in 0..type_len + 1 { + if i != 0 { + input.push_str(", "); + } + input.push_str(&a[i]) + } + input.push_str("]"); + let mut output = String::from("["); + for i in 0..type_len { + if i != 0 { + output.push_str(", "); + } + output.push_str(&e[i]) + } + output.push_str("]"); + let t = format!( + r#" + let a: [{}; {}] = {}; + let e: [{}; {}] = {}; + let mut r: [{}; {}] = [0{}; {}]; + {}(r.as_mut_ptr(), {}(a[1..].as_ptr())); + assert_eq!(r, e); +"#, + type_to_native_type(in_t[1]), + type_len + 1, + input, + type_to_native_type(in_t[1]), + type_len, + output, + type_to_native_type(in_t[1]), + type_len, + type_to_native_type(in_t[1]), + type_len, + name, + name.replace("st", "ld"), + ); + test.push_str(&t); + } + test.push_str(" }\n"); + test +} + fn gen_test( name: &str, in_t: &[&str; 3], @@ -1492,7 +1584,8 @@ fn gen_arm( target: TargetFeature, fixed: &Vec, multi_fn: &Vec, - test_fn: &str, + fn_type: Fntype, + separate: bool, ) -> (String, String) { let name = match suffix { Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])), @@ -1609,19 +1702,37 @@ fn gen_arm( } let ext = type_to_ext(in_t[0]); let ext2 = type_to_ext(out_t); + let ext3 = type_to_ext(in_t[1]); + let ext3_arm = if matches!(fn_type, Fntype::Store) { + let s: Vec<_> = ext3.split('.').collect(); + assert_eq!(s.len(), 2); + format!("{}.{}", s[1], s[0]) + } else { + ext3.clone() + }; let link_arm = if link_arm.starts_with("llvm") { - link_arm.replace("_EXT_", ext).replace("_EXT2_", ext2) + link_arm + .replace("_EXT_", &ext) + .replace("_EXT2_", &ext2) + .replace("_EXT3_", &ext3_arm) } else { let mut link = String::from("llvm.arm.neon."); link.push_str(&link_arm); - link.replace("_EXT_", ext).replace("_EXT2_", ext2) + link.replace("_EXT_", &ext) + .replace("_EXT2_", &ext2) + .replace("_EXT3_", &ext3_arm) }; let link_aarch64 = if link_aarch64.starts_with("llvm") { - link_aarch64.replace("_EXT_", ext).replace("_EXT2_", ext2) + link_aarch64 + .replace("_EXT_", &ext) + .replace("_EXT2_", &ext2) + .replace("_EXT3_", &ext3) } else { let mut link = String::from("llvm.aarch64.neon."); link.push_str(&link_aarch64); - link.replace("_EXT_", ext).replace("_EXT2_", ext2) + link.replace("_EXT_", &ext) + .replace("_EXT2_", &ext2) + .replace("_EXT3_", &ext3) }; if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] { ext_c = format!( @@ -1715,6 +1826,30 @@ fn gen_arm( link_arm_t[3] )); } + if matches!(fn_type, Fntype::Store) { + let sub_type = type_to_sub_type(in_t[1]); + ext_c_arm.push_str(&format!( + r#"#[allow(improper_ctypes)] + extern "unadjusted" {{ + #[cfg_attr(target_arch = "arm", link_name = "{}")] + fn {}(ptr: *mut {}, {}); + }} +"#, + link_arm, + current_fn, + type_to_native_type(in_t[0]), + match type_sub_len(in_t[1]) { + 1 => format!("a: {}", sub_type), + 2 => format!("a: {}, b: {}", sub_type, sub_type,), + 3 => format!("a: {}, b: {}, c: {}", sub_type, sub_type, sub_type,), + 4 => format!( + "a: {}, b: {}, c: {}, d: {}", + sub_type, sub_type, sub_type, sub_type, + ), + _ => panic!("unknown type: {}", in_t[1]), + }, + )); + } if const_aarch64.is_some() { ext_c_aarch64.push_str(&format!( r#"#[allow(improper_ctypes)] @@ -1768,6 +1903,30 @@ fn gen_arm( link_aarch64_t[3] )); } + if matches!(fn_type, Fntype::Store) { + let sub_type = type_to_sub_type(in_t[1]); + ext_c_aarch64.push_str(&format!( + r#"#[allow(improper_ctypes)] + extern "unadjusted" {{ + #[cfg_attr(target_arch = "aarch64", link_name = "{}")] + fn {}({}, ptr: *mut {}); + }} +"#, + link_aarch64, + current_fn, + match type_sub_len(in_t[1]) { + 1 => format!("a: {}", sub_type), + 2 => format!("a: {}, b: {}", sub_type, sub_type,), + 3 => format!("a: {}, b: {}, c: {}", sub_type, sub_type, sub_type,), + 4 => format!( + "a: {}, b: {}, c: {}, d: {}", + sub_type, sub_type, sub_type, sub_type, + ), + _ => panic!("unknown type: {}", in_t[1]), + }, + type_to_native_type(in_t[0]), + )); + } }; let const_declare = if let Some(constn) = constn { format!(r#""#, constn) @@ -1808,72 +1967,82 @@ fn gen_arm( } else { String::new() }; - let trans: [&str; 2] = if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] { - ["", ""] - } else { - ["transmute(", ")"] + let fn_decl = { + let fn_output = if out_t == "void" { + String::new() + } else { + format!("-> {} ", out_t) + }; + let fn_inputs = match para_num { + 1 => format!("(a: {})", in_t[0]), + 2 => format!("(a: {}, b: {})", in_t[0], in_t[1]), + 3 => format!("(a: {}, b: {}, c: {})", in_t[0], in_t[1], in_t[2]), + _ => panic!("unsupported parameter number"), + }; + format!( + "pub unsafe fn {}{}{} {}", + name, const_declare, fn_inputs, fn_output + ) }; let call = match (multi_calls.len(), para_num, fixed.len()) { (0, 1, 0) => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + r#"{}{{ {}{}(a) }}"#, - name, const_declare, in_t[0], out_t, ext_c, current_fn, + fn_decl, ext_c, current_fn, ), (0, 1, _) => { let fixed: Vec = fixed.iter().take(type_len(in_t[0])).cloned().collect(); format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + r#"{}{{ let b{}; {}{}(a, transmute(b)) }}"#, - name, - const_declare, - in_t[0], - out_t, + fn_decl, values(in_t[0], &fixed), ext_c, current_fn, ) } (0, 2, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ + r#"{}{{ {}{}(a, b) }}"#, - name, const_declare, in_t[0], in_t[1], out_t, ext_c, current_fn, + fn_decl, ext_c, current_fn, ), (0, 3, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ + r#"{}{{ {}{}(a, b, c) }}"#, - name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, current_fn, + fn_decl, ext_c, current_fn, ), (_, 1, _) => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + r#"{}{{ {}{} }}"#, - name, const_declare, in_t[0], out_t, ext_c, multi_calls, + fn_decl, ext_c, multi_calls, ), (_, 2, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ + r#"{}{{ {}{} }}"#, - name, const_declare, in_t[0], in_t[1], out_t, ext_c, multi_calls, + fn_decl, ext_c, multi_calls, ), (_, 3, _) => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}, c: {}) -> {} {{ + r#"{}{{ {}{} }}"#, - name, const_declare, in_t[0], in_t[1], in_t[2], out_t, ext_c, multi_calls, + fn_decl, ext_c, multi_calls, ), (_, _, _) => String::new(), }; + let call_arm = if let Some(const_arm) = const_arm { let cnt = if const_arm.contains(':') { let consts: Vec<_> = const_arm.split(':').map(|v| v.trim().to_string()).collect(); consts[0].clone() } else { - let const_arm = const_arm.replace("ttn", type_to_native_type(in_t[1])); + let const_arm = const_arm.replace("ttn", &type_to_native_type(in_t[1])); let mut cnt = String::from(in_t[1]); cnt.push_str("("); for i in 0..type_len(in_t[1]) { @@ -1887,57 +2056,60 @@ fn gen_arm( }; match para_num { 1 => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + r#"{}{{ {}{}{}(a, {}) }}"#, - name, const_declare, in_t[0], out_t, multi_calls, ext_c_arm, current_fn, cnt + fn_decl, multi_calls, ext_c_arm, current_fn, cnt ), 2 => format!( - r#"pub unsafe fn {}{}(a: {}, b:{}) -> {} {{ + r#"{}{{ {}{}{}(a, b, {}) }}"#, - name, - const_declare, - in_t[0], - in_t[1], - out_t, - multi_calls, - ext_c_arm, - current_fn, - cnt + fn_decl, multi_calls, ext_c_arm, current_fn, cnt ), _ => String::new(), } } else if out_t != link_arm_t[3] { match para_num { 1 => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ - {}{}{}{}(a){} + r#"{}{{ + {}{}transmute({}(a)) }}"#, - name, - const_declare, - in_t[0], - out_t, - multi_calls, - ext_c_arm, - trans[0], - current_fn, - trans[1] + fn_decl, multi_calls, ext_c_arm, current_fn, ), 2 => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ - {}{}{}{}(transmute(a), transmute(b)){} + r#"{}{{ + {}{}transmute({}(transmute(a), transmute(b))) }}"#, - name, - const_declare, - in_t[0], - in_t[1], - out_t, - multi_calls, - ext_c_arm, - trans[0], - current_fn, - trans[1], + fn_decl, multi_calls, ext_c_arm, current_fn, + ), + _ => String::new(), + } + } else if matches!(fn_type, Fntype::Store) { + match type_sub_len(in_t[1]) { + 1 => format!( + r#"{}{{ + {}{}{}(a, b) +}}"#, + fn_decl, multi_calls, ext_c_arm, current_fn, + ), + 2 => format!( + r#"{}{{ + {}{}{}(a, b.0, b.1) +}}"#, + fn_decl, multi_calls, ext_c_arm, current_fn, + ), + 3 => format!( + r#"{}{{ + {}{}{}(a, b.0, b.1, b.2) +}}"#, + fn_decl, multi_calls, ext_c_arm, current_fn, + ), + 4 => format!( + r#"{}{{ + {}{}{}(a, b.0, b.1, b.2, b.3) +}}"#, + fn_decl, multi_calls, ext_c_arm, current_fn, ), _ => String::new(), } @@ -1947,74 +2119,67 @@ fn gen_arm( let call_aarch64 = if let Some(const_aarch64) = const_aarch64 { match para_num { 1 => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ + r#"{}{{ {}{}{}(a, {}) }}"#, - name, - const_declare, - in_t[0], - out_t, - multi_calls, - ext_c_aarch64, - current_fn, - const_aarch64 + fn_decl, multi_calls, ext_c_aarch64, current_fn, const_aarch64 ), 2 => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ + r#"{}{{ {}{}{}(a, b, {}) }}"#, - name, - const_declare, - in_t[0], - in_t[1], - out_t, - multi_calls, - ext_c_aarch64, - current_fn, - const_aarch64 + fn_decl, multi_calls, ext_c_aarch64, current_fn, const_aarch64 ), _ => String::new(), } } else if out_t != link_aarch64_t[3] { match para_num { 1 => format!( - r#"pub unsafe fn {}{}(a: {}) -> {} {{ - {}{}{}{}(a){} + r#"{}{{ + {}{}transmute({}(a)) }}"#, - name, - const_declare, - in_t[0], - out_t, - multi_calls, - ext_c_aarch64, - trans[0], - current_fn, - trans[1], + fn_decl, multi_calls, ext_c_aarch64, current_fn, ), 2 => format!( - r#"pub unsafe fn {}{}(a: {}, b: {}) -> {} {{ - {}{}{}{}(a, b){} + r#"{}{{ + {}{}transmute({}(a, b)) }}"#, - name, - const_declare, - in_t[0], - in_t[1], - out_t, - multi_calls, - ext_c_aarch64, - trans[0], - current_fn, - trans[1], + fn_decl, multi_calls, ext_c_aarch64, current_fn, + ), + _ => String::new(), + } + } else if matches!(fn_type, Fntype::Store) { + match type_sub_len(in_t[1]) { + 1 => format!( + r#"{}{{ + {}{}{}(b, a) +}}"#, + fn_decl, multi_calls, ext_c_aarch64, current_fn, + ), + 2 => format!( + r#"{}{{ + {}{}{}(b.0, b.1, a) +}}"#, + fn_decl, multi_calls, ext_c_aarch64, current_fn, + ), + 3 => format!( + r#"{}{{ + {}{}{}(b.0, b.1, b.2, a) +}}"#, + fn_decl, multi_calls, ext_c_aarch64, current_fn, + ), + 4 => format!( + r#"{}{{ + {}{}{}(b.0, b.1, b.2, b.3, a) +}}"#, + fn_decl, multi_calls, ext_c_aarch64, current_fn, ), _ => String::new(), } } else { String::new() }; - let function = if (const_arm.is_some() && const_aarch64.is_some()) - || out_t != link_arm_t[3] - || out_t != link_aarch64_t[3] - { + let function = if separate { format!( r#" {} @@ -2066,10 +2231,8 @@ fn gen_arm( call, ) }; - let test = if test_fn == "load_test" { - gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t)) - } else { - gen_test( + let test = match fn_type { + Fntype::Normal => gen_test( &name, in_t, &out_t, @@ -2077,9 +2240,10 @@ fn gen_arm( [type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])], type_len(out_t), para_num, - ) + ), + Fntype::Load => gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t)), + Fntype::Store => gen_store_test(&name, in_t, &out_t, current_tests, type_len(in_t[1])), }; - (function, test) } @@ -2543,14 +2707,14 @@ fn get_call( let type1 = if types[0] == "element_t" { type_to_native_type(in_t[1]) } else { - &types[0] + String::from(&types[0]) }; let type2 = if types[1] == "element_t" { type_to_native_type(in_t[1]) } else { - &types[1] + String::from(&types[1]) }; - fn_name.push_str(&format!("::<{}, {}>", type1, type2)); + fn_name.push_str(&format!("::<{}, {}>", &type1, &type2)); } else { fn_name.push_str(&fn_format[2]); } @@ -2602,7 +2766,8 @@ fn main() -> io::Result<()> { )> = Vec::new(); let mut multi_fn: Vec = Vec::new(); let mut target: TargetFeature = Default; - let mut test_fn = "normal"; + let mut fn_type: Fntype = Fntype::Normal; + let mut separate = false; // // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY @@ -2684,7 +2849,8 @@ mod test { n = None; multi_fn = Vec::new(); target = Default; - test_fn = "normal"; + fn_type = Fntype::Normal; + separate = false; } else if line.starts_with("//") { } else if line.starts_with("name = ") { current_name = Some(String::from(&line[7..])); @@ -2741,14 +2907,12 @@ mod test { link_arm = Some(String::from(&line[11..])); } else if line.starts_with("const-arm = ") { const_arm = Some(String::from(&line[12..])); - } else if line.starts_with("test = ") { - test_fn = if line.contains("load_test") { - "load_test" - } else if line.contains("store_test") { - "store_test" - } else { - "normal" - } + } else if line.starts_with("load_fn") { + fn_type = Fntype::Load; + } else if line.starts_with("store_fn") { + fn_type = Fntype::Store; + } else if line.starts_with("arm-aarch64-separate") { + separate = true; } else if line.starts_with("target = ") { target = match Some(String::from(&line[9..])) { Some(input) => match input.as_str() { @@ -2795,7 +2959,11 @@ mod test { panic!("Bad spec: {}", line) } if b.len() == 0 { - para_num = 1; + if matches!(fn_type, Fntype::Store) { + para_num = 2; + } else { + para_num = 1; + } } else if c.len() != 0 { para_num = 3; } @@ -2820,7 +2988,8 @@ mod test { target, &fixed, &multi_fn, - test_fn, + fn_type, + separate, ); out_arm.push_str(&function); tests_arm.push_str(&test); @@ -2841,7 +3010,7 @@ mod test { target, &fixed, &multi_fn, - test_fn, + fn_type, ); out_aarch64.push_str(&function); tests_aarch64.push_str(&test); diff --git a/library/stdarch/crates/stdarch-test/src/lib.rs b/library/stdarch/crates/stdarch-test/src/lib.rs index 10834c00e7c1..e5ccec216f72 100644 --- a/library/stdarch/crates/stdarch-test/src/lib.rs +++ b/library/stdarch/crates/stdarch-test/src/lib.rs @@ -124,6 +124,9 @@ pub fn assert(shim_addr: usize, fnname: &str, expected: &str) { // vfmaq_n_f32_vfma : #instructions = 26 >= 22 (limit) "usad8" | "vfma" | "vfms" => 27, "qadd8" | "qsub8" | "sadd8" | "sel" | "shadd8" | "shsub8" | "usub8" | "ssub8" => 29, + // core_arch/src/arm_shared/simd32 + // vst1q_s64_x4_vst1 : #instructions = 40 >= 22 (limit) + "vst1" => 41, // Temporary, currently the fptosi.sat and fptoui.sat LLVM // intrinsics emit unnecessary code on arm. This can be