diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs index adc653e31c04..3712c99ba330 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs @@ -1799,6 +1799,19 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol )) } +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ldr))] +pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t { + use crate::core_arch::simd::f32x4; + transmute(f32x4::new( + *addr, + *addr.offset(1), + *addr.offset(2), + *addr.offset(3), + )) +} + #[cfg(test)] mod tests { use crate::core_arch::aarch64::test_support::*; @@ -1807,6 +1820,16 @@ mod tests { use std::mem::transmute; use stdarch_test::simd_test; + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_f32() { + let e = f32x4::new(1., 2., 3., 4.); + let f = [0., 1., 2., 3., 4.]; + // do a load that has 4 byte alignment to make sure we're not + // over aligning it + let r: f32x4 = transmute(vld1q_f32(f[1..].as_ptr())); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vpaddq_s16() { let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); diff --git a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs index 43fa753cc331..cb397d817897 100644 --- a/library/stdarch/crates/core_arch/src/arm/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/arm/neon/mod.rs @@ -217,6 +217,8 @@ extern "C" { d: int8x8_t, e: int8x8_t, ) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32.p0i8")] + fn vld1q_v4f32(addr: *const u8, align: u32) -> float32x4_t; } /// Absolute value (wrapping). @@ -1767,6 +1769,16 @@ pub unsafe fn vld1q_u8(addr: *const u8) -> uint8x16_t { ptr::read(addr as *const uint8x16_t) } +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon")] +#[target_feature(enable = "v7")] +#[cfg_attr(test, assert_instr("vld1.32"))] +pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t { + vld1q_v4f32(addr as *const u8, 4) +} + #[cfg(test)] mod tests { use super::*; @@ -1791,6 +1803,17 @@ mod tests { assert_eq!(r, e); } + #[cfg(target_arch = "arm")] + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_f32() { + let e = f32x4::new(1., 2., 3., 4.); + let f = [0., 1., 2., 3., 4.]; + // do a load that has 4 byte alignment to make sure we're not + // over aligning it + let r: f32x4 = transmute(vld1q_f32(f[1..].as_ptr())); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] unsafe fn test_vget_lane_u8() { let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);