Add most single-register load Arm intrinsics. (#941)

This commit is contained in:
Adam Hillier 2020-11-07 23:22:07 +00:00 committed by GitHub
parent 7bb92b7809
commit 7ebfd93bb6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 1750 additions and 207 deletions

View file

@ -8,7 +8,7 @@ pub use self::generated::*;
// FIXME: replace neon with asimd
use crate::{
core_arch::{arm::*, simd_llvm::*},
core_arch::{arm::*, simd::*, simd_llvm::*},
mem::{transmute, zeroed},
};
#[cfg(test)]
@ -19,14 +19,6 @@ types! {
pub struct float64x1_t(f64); // FIXME: check this!
/// ARM-specific 128-bit wide vector of two packed `f64`.
pub struct float64x2_t(f64, f64);
/// ARM-specific 64-bit wide vector of one packed `p64`.
pub struct poly64x1_t(i64); // FIXME: check this!
/// ARM-specific 64-bit wide vector of one packed `p64`.
pub struct poly64_t(i64); // FIXME: check this!
/// ARM-specific 64-bit wide vector of two packed `p64`.
pub struct poly64x2_t(i64, i64); // FIXME: check this!
/// ARM-specific 128-bit wide vector of one packed `p64`.
pub struct poly128_t(i128); // FIXME: check this!
}
/// ARM-specific type containing two `int8x16_t` vectors.
@ -360,6 +352,333 @@ extern "C" {
fn vsriq_n_s64_(a: int64x2_t, b: int64x2_t, n: i32) -> int64x2_t;
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t {
transmute(i8x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t {
transmute(i8x16::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
*ptr.offset(8),
*ptr.offset(9),
*ptr.offset(10),
*ptr.offset(11),
*ptr.offset(12),
*ptr.offset(13),
*ptr.offset(14),
*ptr.offset(15),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t {
transmute(i16x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t {
transmute(i16x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t {
transmute(i32x2::new(*ptr, *ptr.offset(1)))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t {
transmute(i32x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t {
transmute(i64x1::new(*ptr))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t {
transmute(i64x2::new(*ptr, *ptr.offset(1)))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t {
transmute(u8x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t {
transmute(u8x16::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
*ptr.offset(8),
*ptr.offset(9),
*ptr.offset(10),
*ptr.offset(11),
*ptr.offset(12),
*ptr.offset(13),
*ptr.offset(14),
*ptr.offset(15),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t {
transmute(u16x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t {
transmute(u16x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t {
transmute(u32x2::new(*ptr, *ptr.offset(1)))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t {
transmute(u32x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t {
transmute(u64x1::new(*ptr))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t {
transmute(u64x2::new(*ptr, *ptr.offset(1)))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t {
transmute(u8x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t {
transmute(u8x16::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
*ptr.offset(8),
*ptr.offset(9),
*ptr.offset(10),
*ptr.offset(11),
*ptr.offset(12),
*ptr.offset(13),
*ptr.offset(14),
*ptr.offset(15),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t {
transmute(u16x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t {
transmute(u16x8::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
*ptr.offset(4),
*ptr.offset(5),
*ptr.offset(6),
*ptr.offset(7),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t {
transmute(f32x2::new(*ptr, *ptr.offset(1)))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t {
transmute(f32x4::new(
*ptr,
*ptr.offset(1),
*ptr.offset(2),
*ptr.offset(3),
))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1_f64(ptr: *const f64) -> float64x1_t {
transmute(f64x1::new(*ptr))
}
/// Load multiple single-element structures to one, two, three, or four registers.
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_f64(ptr: *const f64) -> float64x2_t {
transmute(f64x2::new(*ptr, *ptr.offset(1)))
}
/// Absolute Value (wrapping).
#[inline]
#[target_feature(enable = "neon")]
@ -656,7 +975,7 @@ pub unsafe fn vaddvq_u64(a: uint64x2_t) -> u64 {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(pmull))]
pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t {
pub unsafe fn vmull_p64(a: p64, b: p64) -> p128 {
transmute(vmull_p64_(transmute(a), transmute(b)))
}
@ -1338,7 +1657,6 @@ pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(tbx))]
pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
use crate::core_arch::simd::i8x8;
let r = vqtbx1_s8(a, vcombine_s8(b, zeroed()), transmute(c));
let m: int8x8_t = simd_lt(c, transmute(i8x8::splat(8)));
simd_select(m, r, a)
@ -1350,7 +1668,6 @@ pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(tbx))]
pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
use crate::core_arch::simd::u8x8;
let r = vqtbx1_u8(a, vcombine_u8(b, zeroed()), c);
let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8)));
simd_select(m, r, a)
@ -1362,7 +1679,6 @@ pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(tbx))]
pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t {
use crate::core_arch::simd::u8x8;
let r = vqtbx1_p8(a, vcombine_p8(b, zeroed()), c);
let m: int8x8_t = simd_lt(c, transmute(u8x8::splat(8)));
simd_select(m, r, a)
@ -1401,7 +1717,6 @@ pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(tbx))]
pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
use crate::core_arch::simd::i8x8;
let r = vqtbx2_s8(
a,
int8x16x2_t(vcombine_s8(b.0, b.1), vcombine_s8(b.2, zeroed())),
@ -1417,7 +1732,6 @@ pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t {
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(tbx))]
pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t {
use crate::core_arch::simd::u8x8;
let r = vqtbx2_u8(
a,
uint8x16x2_t(vcombine_u8(b.0, b.1), vcombine_u8(b.2, zeroed())),
@ -1433,7 +1747,6 @@ pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(tbx))]
pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t {
use crate::core_arch::simd::u8x8;
let r = vqtbx2_p8(
a,
poly8x16x2_t(vcombine_p8(b.0, b.1), vcombine_p8(b.2, zeroed())),
@ -1986,45 +2299,6 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol
))
}
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_f32(addr: *const f32) -> float32x4_t {
use crate::core_arch::simd::f32x4;
transmute(f32x4::new(
*addr,
*addr.offset(1),
*addr.offset(2),
*addr.offset(3),
))
}
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_s32(addr: *const i32) -> int32x4_t {
use crate::core_arch::simd::i32x4;
transmute(i32x4::new(
*addr,
*addr.offset(1),
*addr.offset(2),
*addr.offset(3),
))
}
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(ldr))]
pub unsafe fn vld1q_u32(addr: *const u32) -> uint32x4_t {
use crate::core_arch::simd::u32x4;
transmute(u32x4::new(
*addr,
*addr.offset(1),
*addr.offset(2),
*addr.offset(3),
))
}
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
@ -2446,36 +2720,6 @@ mod tests {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_f32() {
let e = f32x4::new(1., 2., 3., 4.);
let f = [0., 1., 2., 3., 4.];
// do a load that has 4 byte alignment to make sure we're not
// over aligning it
let r: f32x4 = transmute(vld1q_f32(f[1..].as_ptr()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s32() {
let e = i32x4::new(1, 2, 3, 4);
let f = [0, 1, 2, 3, 4];
// do a load that has 4 byte alignment to make sure we're not
// over aligning it
let r: i32x4 = transmute(vld1q_s32(f[1..].as_ptr()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u32() {
let e = u32x4::new(1, 2, 3, 4);
let f = [0, 1, 2, 3, 4];
// do a load that has 4 byte alignment to make sure we're not
// over aligning it
let r: u32x4 = transmute(vld1q_u32(f[1..].as_ptr()));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vuqadd_s8() {
let a = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, i8::MAX);
@ -3775,3 +4019,7 @@ mod table_lookup_tests;
#[cfg(test)]
#[path = "../../arm/neon/shift_and_insert_tests.rs"]
mod shift_and_insert_tests;
#[cfg(test)]
#[path = "../../arm/neon/load_tests.rs"]
mod load_tests;

View file

@ -0,0 +1,208 @@
//! Tests for ARM+v7+neon load (vld1) intrinsics.
//!
//! These are included in `{arm, aarch64}::neon`.
use super::*;
#[cfg(target_arch = "arm")]
use crate::core_arch::arm::*;
#[cfg(target_arch = "aarch64")]
use crate::core_arch::aarch64::*;
use crate::core_arch::simd::*;
use std::mem;
use stdarch_test::simd_test;
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s8() {
let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s8() {
let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s16() {
let a: [i16; 5] = [0, 1, 2, 3, 4];
let e = i16x4::new(1, 2, 3, 4);
let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s16() {
let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s32() {
let a: [i32; 3] = [0, 1, 2];
let e = i32x2::new(1, 2);
let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s32() {
let a: [i32; 5] = [0, 1, 2, 3, 4];
let e = i32x4::new(1, 2, 3, 4);
let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_s64() {
let a: [i64; 2] = [0, 1];
let e = i64x1::new(1);
let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_s64() {
let a: [i64; 3] = [0, 1, 2];
let e = i64x2::new(1, 2);
let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u8() {
let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u8() {
let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u16() {
let a: [u16; 5] = [0, 1, 2, 3, 4];
let e = u16x4::new(1, 2, 3, 4);
let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u16() {
let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u32() {
let a: [u32; 3] = [0, 1, 2];
let e = u32x2::new(1, 2);
let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u32() {
let a: [u32; 5] = [0, 1, 2, 3, 4];
let e = u32x4::new(1, 2, 3, 4);
let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_u64() {
let a: [u64; 2] = [0, 1];
let e = u64x1::new(1);
let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_u64() {
let a: [u64; 3] = [0, 1, 2];
let e = u64x2::new(1, 2);
let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_p8() {
let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_p8() {
let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_p16() {
let a: [p16; 5] = [0, 1, 2, 3, 4];
let e = u16x4::new(1, 2, 3, 4);
let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_p16() {
let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8];
let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1_f32() {
let a: [f32; 3] = [0., 1., 2.];
let e = f32x2::new(1., 2.);
let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_f32() {
let a: [f32; 5] = [0., 1., 2., 3., 4.];
let e = f32x4::new(1., 2., 3., 4.);
let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[cfg(target_arch = "aarch64")]
#[simd_test(enable = "neon")]
unsafe fn test_vld1_f64() {
let a: [f64; 2] = [0., 1.];
let e = f64x1::new(1.);
let r: f64x1 = transmute(vld1_f64(a[1..].as_ptr()));
assert_eq!(r, e)
}
#[cfg(target_arch = "aarch64")]
#[simd_test(enable = "neon")]
unsafe fn test_vld1q_f64() {
let a: [f64; 3] = [0., 1., 2.];
let e = f64x2::new(1., 2.);
let r: f64x2 = transmute(vld1q_f64(a[1..].as_ptr()));
assert_eq!(r, e)
}

File diff suppressed because it is too large Load diff

View file

@ -133,6 +133,7 @@ simd_ty!(i32x2[i32]: i32, i32 | x0, x1);
simd_ty!(i64x1[i64]: i64 | x1);
simd_ty!(f32x2[f32]: f32, f32 | x0, x1);
simd_ty!(f64x1[f64]: f64 | x1);
// 128-bit wide types:

View file

@ -164,6 +164,8 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"u64" => quote! { &U64 },
"u128" => quote! { &U128 },
"u8" => quote! { &U8 },
"p8" => quote! { &P8 },
"p16" => quote! { &P16 },
"Ordering" => quote! { &ORDERING },
"CpuidResult" => quote! { &CPUID },
@ -209,13 +211,13 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"poly8x16x2_t" => quote! { &POLY8X16X2 },
"poly8x16x3_t" => quote! { &POLY8X16X3 },
"poly8x16x4_t" => quote! { &POLY8X16X4 },
"poly64_t" => quote! { &P64 },
"p64" => quote! { &P64 },
"poly64x1_t" => quote! { &POLY64X1 },
"poly64x2_t" => quote! { &POLY64X2 },
"poly8x16_t" => quote! { &POLY8X16 },
"poly16x4_t" => quote! { &POLY16X4 },
"poly16x8_t" => quote! { &POLY16X8 },
"poly128_t" => quote! { &P128 },
"p128" => quote! { &P128 },
"v16i8" => quote! { &v16i8 },
"v8i16" => quote! { &v8i16 },