Add most single-register load Arm intrinsics. (#941)
This commit is contained in:
parent
7bb92b7809
commit
7ebfd93bb6
5 changed files with 1750 additions and 207 deletions
|
|
@ -8,7 +8,7 @@ pub use self::generated::*;
|
|||
// FIXME: replace neon with asimd
|
||||
|
||||
use crate::{
|
||||
core_arch::{arm::*, simd_llvm::*},
|
||||
core_arch::{arm::*, simd::*, simd_llvm::*},
|
||||
mem::{transmute, zeroed},
|
||||
};
|
||||
#[cfg(test)]
|
||||
|
|
@ -19,14 +19,6 @@ types! {
|
|||
pub struct float64x1_t(f64); // FIXME: check this!
|
||||
/// ARM-specific 128-bit wide vector of two packed `f64`.
|
||||
pub struct float64x2_t(f64, f64);
|
||||
/// ARM-specific 64-bit wide vector of one packed `p64`.
|
||||
pub struct poly64x1_t(i64); // FIXME: check this!
|
||||
/// ARM-specific 64-bit wide vector of one packed `p64`.
|
||||
pub struct poly64_t(i64); // FIXME: check this!
|
||||
/// ARM-specific 64-bit wide vector of two packed `p64`.
|
||||
pub struct poly64x2_t(i64, i64); // FIXME: check this!
|
||||
/// ARM-specific 128-bit wide vector of one packed `p64`.
|
||||
pub struct poly128_t(i128); // FIXME: check this!
|
||||
}
|
||||
|
||||
/// ARM-specific type containing two `int8x16_t` vectors.
|
||||
|
|
@ -360,6 +352,333 @@ extern "C" {
|
|||
fn vsriq_n_s64_(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
|
||||
transmute(i8x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
|
||||
transmute(i8x16::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
*ptr.offset(8),
|
||||
*ptr.offset(9),
|
||||
*ptr.offset(10),
|
||||
*ptr.offset(11),
|
||||
*ptr.offset(12),
|
||||
*ptr.offset(13),
|
||||
*ptr.offset(14),
|
||||
*ptr.offset(15),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
|
||||
transmute(i16x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
|
||||
transmute(i16x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
|
||||
transmute(i32x2::new(*ptr, *ptr.offset(1)))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
|
||||
transmute(i32x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
|
||||
transmute(i64x1::new(*ptr))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
|
||||
transmute(i64x2::new(*ptr, *ptr.offset(1)))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
|
||||
transmute(u8x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
|
||||
transmute(u8x16::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
*ptr.offset(8),
|
||||
*ptr.offset(9),
|
||||
*ptr.offset(10),
|
||||
*ptr.offset(11),
|
||||
*ptr.offset(12),
|
||||
*ptr.offset(13),
|
||||
*ptr.offset(14),
|
||||
*ptr.offset(15),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
|
||||
transmute(u16x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
|
||||
transmute(u16x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
|
||||
transmute(u32x2::new(*ptr, *ptr.offset(1)))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
|
||||
transmute(u32x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
|
||||
transmute(u64x1::new(*ptr))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
|
||||
transmute(u64x2::new(*ptr, *ptr.offset(1)))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
|
||||
transmute(u8x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
|
||||
transmute(u8x16::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
*ptr.offset(8),
|
||||
*ptr.offset(9),
|
||||
*ptr.offset(10),
|
||||
*ptr.offset(11),
|
||||
*ptr.offset(12),
|
||||
*ptr.offset(13),
|
||||
*ptr.offset(14),
|
||||
*ptr.offset(15),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
|
||||
transmute(u16x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
|
||||
transmute(u16x8::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
*ptr.offset(4),
|
||||
*ptr.offset(5),
|
||||
*ptr.offset(6),
|
||||
*ptr.offset(7),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
|
||||
transmute(f32x2::new(*ptr, *ptr.offset(1)))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
|
||||
transmute(f32x4::new(
|
||||
*ptr,
|
||||
*ptr.offset(1),
|
||||
*ptr.offset(2),
|
||||
*ptr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
|
||||
transmute(f64x1::new(*ptr))
|
||||
}
|
||||
|
||||
/// Load multiple single-element structures to one, two, three, or four registers.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
|
||||
transmute(f64x2::new(*ptr, *ptr.offset(1)))
|
||||
}
|
||||
|
||||
/// Absolute Value (wrapping).
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -656,7 +975,7 @@ pub unsafe fn vaddvq_u64(a: uint64x2_t) -> u64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(pmull))]
|
||||
pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t {
|
||||
pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
|
||||
transmute(vmull_p64_(transmute(a), transmute(b)))
|
||||
}
|
||||
|
||||
|
|
@ -1338,7 +1657,6 @@ pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(tbx))]
|
||||
pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
|
||||
use crate::core_arch::simd::i8x8;
|
||||
let r = vqtbx1_s8(a, vcombine_s8(b, zeroed()), transmute(c));
|
||||
let m: int8x8_t = simd_lt(c, transmute(i8x8::splat(8)));
|
||||
simd_select(m, r, a)
|
||||
|
|
@ -1350,7 +1668,6 @@ pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(tbx))]
|
||||
pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
|
||||
use crate::core_arch::simd::u8x8;
|
||||
let r = vqtbx1_u8(a, vcombine_u8(b, zeroed()), c);
|
||||
let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8)));
|
||||
simd_select(m, r, a)
|
||||
|
|
@ -1362,7 +1679,6 @@ pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(tbx))]
|
||||
pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
|
||||
use crate::core_arch::simd::u8x8;
|
||||
let r = vqtbx1_p8(a, vcombine_p8(b, zeroed()), c);
|
||||
let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8)));
|
||||
simd_select(m, r, a)
|
||||
|
|
@ -1401,7 +1717,6 @@ pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(tbx))]
|
||||
pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
|
||||
use crate::core_arch::simd::i8x8;
|
||||
let r = vqtbx2_s8(
|
||||
a,
|
||||
int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, zeroed())),
|
||||
|
|
@ -1417,7 +1732,6 @@ pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(tbx))]
|
||||
pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
|
||||
use crate::core_arch::simd::u8x8;
|
||||
let r = vqtbx2_u8(
|
||||
a,
|
||||
uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, zeroed())),
|
||||
|
|
@ -1433,7 +1747,6 @@ pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t
|
|||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(tbx))]
|
||||
pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
|
||||
use crate::core_arch::simd::u8x8;
|
||||
let r = vqtbx2_p8(
|
||||
a,
|
||||
poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, zeroed())),
|
||||
|
|
@ -1986,45 +2299,6 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol
|
|||
))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t {
|
||||
use crate::core_arch::simd::f32x4;
|
||||
transmute(f32x4::new(
|
||||
*addr,
|
||||
*addr.offset(1),
|
||||
*addr.offset(2),
|
||||
*addr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_s32(addr: *const i32) -> int32x4_t {
|
||||
use crate::core_arch::simd::i32x4;
|
||||
transmute(i32x4::new(
|
||||
*addr,
|
||||
*addr.offset(1),
|
||||
*addr.offset(2),
|
||||
*addr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(ldr))]
|
||||
pub unsafe fn vld1q_u32(addr: *const u32) -> uint32x4_t {
|
||||
use crate::core_arch::simd::u32x4;
|
||||
transmute(u32x4::new(
|
||||
*addr,
|
||||
*addr.offset(1),
|
||||
*addr.offset(2),
|
||||
*addr.offset(3),
|
||||
))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fcvtzs))]
|
||||
|
|
@ -2446,36 +2720,6 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_f32() {
|
||||
let e = f32x4::new(1., 2., 3., 4.);
|
||||
let f = [0., 1., 2., 3., 4.];
|
||||
// do a load that has 4 byte alignment to make sure we're not
|
||||
// over aligning it
|
||||
let r: f32x4 = transmute(vld1q_f32(f[1..].as_ptr()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s32() {
|
||||
let e = i32x4::new(1, 2, 3, 4);
|
||||
let f = [0, 1, 2, 3, 4];
|
||||
// do a load that has 4 byte alignment to make sure we're not
|
||||
// over aligning it
|
||||
let r: i32x4 = transmute(vld1q_s32(f[1..].as_ptr()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u32() {
|
||||
let e = u32x4::new(1, 2, 3, 4);
|
||||
let f = [0, 1, 2, 3, 4];
|
||||
// do a load that has 4 byte alignment to make sure we're not
|
||||
// over aligning it
|
||||
let r: u32x4 = transmute(vld1q_u32(f[1..].as_ptr()));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vuqadd_s8() {
|
||||
let a = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, i8::MAX);
|
||||
|
|
@ -3775,3 +4019,7 @@ mod table_lookup_tests;
|
|||
#[cfg(test)]
|
||||
#[path = "../../arm/neon/shift_and_insert_tests.rs"]
|
||||
mod shift_and_insert_tests;
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "../../arm/neon/load_tests.rs"]
|
||||
mod load_tests;
|
||||
|
|
|
|||
208
library/stdarch/crates/core_arch/src/arm/neon/load_tests.rs
Normal file
208
library/stdarch/crates/core_arch/src/arm/neon/load_tests.rs
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
//! Tests for ARM+v7+neon load (vld1) intrinsics.
|
||||
//!
|
||||
//! These are included in `{arm, aarch64}::neon`.
|
||||
|
||||
use super::*;
|
||||
|
||||
#[cfg(target_arch = "arm")]
|
||||
use crate::core_arch::arm::*;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use crate::core_arch::aarch64::*;
|
||||
|
||||
use crate::core_arch::simd::*;
|
||||
use std::mem;
|
||||
use stdarch_test::simd_test;
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s8() {
|
||||
let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s8() {
|
||||
let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s16() {
|
||||
let a: [i16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = i16x4::new(1, 2, 3, 4);
|
||||
let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s16() {
|
||||
let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s32() {
|
||||
let a: [i32; 3] = [0, 1, 2];
|
||||
let e = i32x2::new(1, 2);
|
||||
let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s32() {
|
||||
let a: [i32; 5] = [0, 1, 2, 3, 4];
|
||||
let e = i32x4::new(1, 2, 3, 4);
|
||||
let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_s64() {
|
||||
let a: [i64; 2] = [0, 1];
|
||||
let e = i64x1::new(1);
|
||||
let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_s64() {
|
||||
let a: [i64; 3] = [0, 1, 2];
|
||||
let e = i64x2::new(1, 2);
|
||||
let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u8() {
|
||||
let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u8() {
|
||||
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u16() {
|
||||
let a: [u16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u16x4::new(1, 2, 3, 4);
|
||||
let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u16() {
|
||||
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u32() {
|
||||
let a: [u32; 3] = [0, 1, 2];
|
||||
let e = u32x2::new(1, 2);
|
||||
let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u32() {
|
||||
let a: [u32; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u32x4::new(1, 2, 3, 4);
|
||||
let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_u64() {
|
||||
let a: [u64; 2] = [0, 1];
|
||||
let e = u64x1::new(1);
|
||||
let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_u64() {
|
||||
let a: [u64; 3] = [0, 1, 2];
|
||||
let e = u64x2::new(1, 2);
|
||||
let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_p8() {
|
||||
let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_p8() {
|
||||
let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_p16() {
|
||||
let a: [p16; 5] = [0, 1, 2, 3, 4];
|
||||
let e = u16x4::new(1, 2, 3, 4);
|
||||
let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_p16() {
|
||||
let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
|
||||
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_f32() {
|
||||
let a: [f32; 3] = [0., 1., 2.];
|
||||
let e = f32x2::new(1., 2.);
|
||||
let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_f32() {
|
||||
let a: [f32; 5] = [0., 1., 2., 3., 4.];
|
||||
let e = f32x4::new(1., 2., 3., 4.);
|
||||
let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1_f64() {
|
||||
let a: [f64; 2] = [0., 1.];
|
||||
let e = f64x1::new(1.);
|
||||
let r: f64x1 = transmute(vld1_f64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vld1q_f64() {
|
||||
let a: [f64; 3] = [0., 1., 2.];
|
||||
let e = f64x2::new(1., 2.);
|
||||
let r: f64x2 = transmute(vld1q_f64(a[1..].as_ptr()));
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -133,6 +133,7 @@ simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
|
|||
simd_ty!(i64x1[i64]: i64 | x1);
|
||||
|
||||
simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
|
||||
simd_ty!(f64x1[f64]: f64 | x1);
|
||||
|
||||
// 128-bit wide types:
|
||||
|
||||
|
|
|
|||
|
|
@ -164,6 +164,8 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
|
|||
"u64" => quote! { &U64 },
|
||||
"u128" => quote! { &U128 },
|
||||
"u8" => quote! { &U8 },
|
||||
"p8" => quote! { &P8 },
|
||||
"p16" => quote! { &P16 },
|
||||
"Ordering" => quote! { &ORDERING },
|
||||
"CpuidResult" => quote! { &CPUID },
|
||||
|
||||
|
|
@ -209,13 +211,13 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
|
|||
"poly8x16x2_t" => quote! { &POLY8X16X2 },
|
||||
"poly8x16x3_t" => quote! { &POLY8X16X3 },
|
||||
"poly8x16x4_t" => quote! { &POLY8X16X4 },
|
||||
"poly64_t" => quote! { &P64 },
|
||||
"p64" => quote! { &P64 },
|
||||
"poly64x1_t" => quote! { &POLY64X1 },
|
||||
"poly64x2_t" => quote! { &POLY64X2 },
|
||||
"poly8x16_t" => quote! { &POLY8X16 },
|
||||
"poly16x4_t" => quote! { &POLY16X4 },
|
||||
"poly16x8_t" => quote! { &POLY16X8 },
|
||||
"poly128_t" => quote! { &P128 },
|
||||
"p128" => quote! { &P128 },
|
||||
|
||||
"v16i8" => quote! { &v16i8 },
|
||||
"v8i16" => quote! { &v8i16 },
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue