add vldx neon instructions (#1200)

This commit is contained in:
Sparrow Li 2021-08-25 02:51:30 +08:00 committed by GitHub
parent b10d00cae0
commit 4baf95fddd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 2730 additions and 703 deletions

View file

@ -1,4 +1,4 @@
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.aarch64.crc32x"]
fn crc32x_(crc: u32, data: u64) -> u32;

View file

@ -25,48 +25,38 @@ types! {
pub struct float64x2_t(f64, f64);
}
/// ARM-specific type containing two `int8x16_t` vectors.
/// ARM-specific type containing two `float64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t);
/// ARM-specific type containing three `int8x16_t` vectors.
pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
/// ARM-specific type containing three `float64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t);
/// ARM-specific type containing four `int8x16_t` vectors.
pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
/// ARM-specific type containing four `float64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t);
/// ARM-specific type containing two `uint8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t);
/// ARM-specific type containing three `uint8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t);
/// ARM-specific type containing four `uint8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x16x4_t(
pub uint8x16_t,
pub uint8x16_t,
pub uint8x16_t,
pub uint8x16_t,
pub struct float64x1x4_t(
pub float64x1_t,
pub float64x1_t,
pub float64x1_t,
pub float64x1_t,
);
/// ARM-specific type containing two `poly8x16_t` vectors.
/// ARM-specific type containing two `float64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t);
/// ARM-specific type containing three `poly8x16_t` vectors.
pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
/// ARM-specific type containing three `float64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t);
/// ARM-specific type containing four `poly8x16_t` vectors.
pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
/// ARM-specific type containing four `float64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x16x4_t(
pub poly8x16_t,
pub poly8x16_t,
pub poly8x16_t,
pub poly8x16_t,
pub struct float64x2x4_t(
pub float64x2_t,
pub float64x2_t,
pub float64x2_t,
pub float64x2_t,
);
#[allow(improper_ctypes)]
extern "C" {
extern "unadjusted" {
// absolute value
#[link_name = "llvm.aarch64.neon.abs.i64"]
fn vabsd_s64_(a: i64) -> i64;

View file

@ -1,7 +1,7 @@
#[cfg(test)]
use stdarch_test::assert_instr;
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.prefetch"]
fn prefetch(p: *const i8, rw: i32, loc: i32, ty: i32);
}

View file

@ -17,7 +17,7 @@
#[cfg(test)]
use stdarch_test::assert_instr;
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.aarch64.tstart"]
fn aarch64_tstart() -> u64;
#[link_name = "llvm.aarch64.tcommit"]

View file

@ -32,7 +32,7 @@ types! {
pub struct uint16x2_t(u16, u16);
}
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.smulbb"]
fn arm_smulbb(a: i32, b: i32) -> i32;

View file

@ -11,7 +11,7 @@
doc
))]
pub unsafe fn __clrex() {
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.clrex"]
fn clrex();
}
@ -27,7 +27,7 @@ pub unsafe fn __clrex() {
doc
))]
pub unsafe fn __ldrexb(p: *const u8) -> u8 {
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.ldrex.p0i8"]
fn ldrex8(p: *const u8) -> u32;
}
@ -43,7 +43,7 @@ pub unsafe fn __ldrexb(p: *const u8) -> u8 {
doc
))]
pub unsafe fn __ldrexh(p: *const u16) -> u16 {
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.ldrex.p0i16"]
fn ldrex16(p: *const u16) -> u32;
}
@ -60,7 +60,7 @@ pub unsafe fn __ldrexh(p: *const u16) -> u16 {
doc
))]
pub unsafe fn __ldrex(p: *const u32) -> u32 {
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.ldrex.p0i32"]
fn ldrex32(p: *const u32) -> u32;
}
@ -78,7 +78,7 @@ pub unsafe fn __ldrex(p: *const u32) -> u32 {
doc
))]
pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 {
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.strex.p0i8"]
fn strex8(value: u32, addr: *mut u8) -> u32;
}
@ -97,7 +97,7 @@ pub unsafe fn __strexb(value: u32, addr: *mut u8) -> u32 {
doc
))]
pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 {
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.strex.p0i16"]
fn strex16(value: u32, addr: *mut u16) -> u32;
}
@ -116,7 +116,7 @@ pub unsafe fn __strexh(value: u16, addr: *mut u16) -> u32 {
doc
))]
pub unsafe fn __strex(value: u32, addr: *mut u32) -> u32 {
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.strex.p0i32"]
fn strex32(value: u32, addr: *mut u32) -> u32;
}

View file

@ -107,7 +107,7 @@ pub unsafe fn __dbg<const IMM4: i32>() {
dbg(IMM4);
}
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.dbg"]
fn dbg(_: i32);
}

View file

@ -12,7 +12,7 @@ pub(crate) type p8 = u8;
pub(crate) type p16 = u16;
#[allow(improper_ctypes)]
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.neon.vbsl.v8i8"]
fn vbsl_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t;
#[link_name = "llvm.arm.neon.vbsl.v16i8"]

View file

@ -80,7 +80,7 @@ macro_rules! dsp_call {
};
}
extern "C" {
extern "unadjusted" {
#[link_name = "llvm.arm.qadd8"]
fn arm_qadd8(a: i32, b: i32) -> i32;

View file

@ -122,7 +122,7 @@ where
arg.__isb()
}
extern "C" {
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.dmb")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.dmb")]
fn dmb(_: i32);

View file

@ -1,4 +1,4 @@
extern "C" {
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crc32b")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.crc32b")]
fn crc32b_(crc: u32, data: u32) -> u32;

View file

@ -1,7 +1,7 @@
use crate::core_arch::arm_shared::{uint32x4_t, uint8x16_t};
#[allow(improper_ctypes)]
extern "C" {
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.crypto.aese")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.aese")]
fn vaeseq_u8_(data: uint8x16_t, key: uint8x16_t) -> uint8x16_t;

View file

@ -80,7 +80,7 @@ pub unsafe fn __nop() {
asm!("nop", options(nomem, nostack, preserves_flags));
}
extern "C" {
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.hint")]
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.hint")]
fn hint(_: i32);

View file

@ -92,6 +92,16 @@ pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t);
#[derive(Copy, Clone)]
pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t);
/// ARM-specific type containing two `int8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t);
/// ARM-specific type containing three `int8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t);
/// ARM-specific type containing four `int8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t);
/// ARM-specific type containing two `uint8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t);
@ -102,6 +112,21 @@ pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t);
#[derive(Copy, Clone)]
pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t);
/// ARM-specific type containing two `uint8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t);
/// ARM-specific type containing three `uint8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t);
/// ARM-specific type containing four `uint8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct uint8x16x4_t(
pub uint8x16_t,
pub uint8x16_t,
pub uint8x16_t,
pub uint8x16_t,
);
/// ARM-specific type containing two `poly8x8_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t);
@ -112,8 +137,233 @@ pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t);
#[derive(Copy, Clone)]
pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t);
/// ARM-specific type containing two `poly8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t);
/// ARM-specific type containing three `poly8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t);
/// ARM-specific type containing four `poly8x16_t` vectors.
#[derive(Copy, Clone)]
pub struct poly8x16x4_t(
pub poly8x16_t,
pub poly8x16_t,
pub poly8x16_t,
pub poly8x16_t,
);
/// ARM-specific type containing two `int16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t);
/// ARM-specific type containing three `int16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct int16x4x3_t(pub int16x4_t, pub int16x4_t, pub int16x4_t);
/// ARM-specific type containing four `int16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct int16x4x4_t(pub int16x4_t, pub int16x4_t, pub int16x4_t, pub int16x4_t);
/// ARM-specific type containing two `int16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct int16x8x2_t(pub int16x8_t, pub int16x8_t);
/// ARM-specific type containing three `int16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct int16x8x3_t(pub int16x8_t, pub int16x8_t, pub int16x8_t);
/// ARM-specific type containing four `int16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct int16x8x4_t(pub int16x8_t, pub int16x8_t, pub int16x8_t, pub int16x8_t);
/// ARM-specific type containing two `uint16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct uint16x4x2_t(pub uint16x4_t, pub uint16x4_t);
/// ARM-specific type containing three `uint16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct uint16x4x3_t(pub uint16x4_t, pub uint16x4_t, pub uint16x4_t);
/// ARM-specific type containing four `uint16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct uint16x4x4_t(
pub uint16x4_t,
pub uint16x4_t,
pub uint16x4_t,
pub uint16x4_t,
);
/// ARM-specific type containing two `uint16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct uint16x8x2_t(pub uint16x8_t, pub uint16x8_t);
/// ARM-specific type containing three `uint16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct uint16x8x3_t(pub uint16x8_t, pub uint16x8_t, pub uint16x8_t);
/// ARM-specific type containing four `uint16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct uint16x8x4_t(
pub uint16x8_t,
pub uint16x8_t,
pub uint16x8_t,
pub uint16x8_t,
);
/// ARM-specific type containing two `poly16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct poly16x4x2_t(pub poly16x4_t, pub poly16x4_t);
/// ARM-specific type containing three `poly16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct poly16x4x3_t(pub poly16x4_t, pub poly16x4_t, pub poly16x4_t);
/// ARM-specific type containing four `poly16x4_t` vectors.
#[derive(Copy, Clone)]
pub struct poly16x4x4_t(
pub poly16x4_t,
pub poly16x4_t,
pub poly16x4_t,
pub poly16x4_t,
);
/// ARM-specific type containing two `poly16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct poly16x8x2_t(pub poly16x8_t, pub poly16x8_t);
/// ARM-specific type containing three `poly16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct poly16x8x3_t(pub poly16x8_t, pub poly16x8_t, pub poly16x8_t);
/// ARM-specific type containing four `poly16x8_t` vectors.
#[derive(Copy, Clone)]
pub struct poly16x8x4_t(
pub poly16x8_t,
pub poly16x8_t,
pub poly16x8_t,
pub poly16x8_t,
);
/// ARM-specific type containing two `int32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct int32x2x2_t(pub int32x2_t, pub int32x2_t);
/// ARM-specific type containing three `int32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct int32x2x3_t(pub int32x2_t, pub int32x2_t, pub int32x2_t);
/// ARM-specific type containing four `int32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct int32x2x4_t(pub int32x2_t, pub int32x2_t, pub int32x2_t, pub int32x2_t);
/// ARM-specific type containing two `int32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct int32x4x2_t(pub int32x4_t, pub int32x4_t);
/// ARM-specific type containing three `int32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct int32x4x3_t(pub int32x4_t, pub int32x4_t, pub int32x4_t);
/// ARM-specific type containing four `int32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct int32x4x4_t(pub int32x4_t, pub int32x4_t, pub int32x4_t, pub int32x4_t);
/// ARM-specific type containing two `uint32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct uint32x2x2_t(pub uint32x2_t, pub uint32x2_t);
/// ARM-specific type containing three `uint32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct uint32x2x3_t(pub uint32x2_t, pub uint32x2_t, pub uint32x2_t);
/// ARM-specific type containing four `uint32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct uint32x2x4_t(
pub uint32x2_t,
pub uint32x2_t,
pub uint32x2_t,
pub uint32x2_t,
);
/// ARM-specific type containing two `uint32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct uint32x4x2_t(pub uint32x4_t, pub uint32x4_t);
/// ARM-specific type containing three `uint32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct uint32x4x3_t(pub uint32x4_t, pub uint32x4_t, pub uint32x4_t);
/// ARM-specific type containing four `uint32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct uint32x4x4_t(
pub uint32x4_t,
pub uint32x4_t,
pub uint32x4_t,
pub uint32x4_t,
);
/// ARM-specific type containing two `float32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct float32x2x2_t(pub float32x2_t, pub float32x2_t);
/// ARM-specific type containing three `float32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct float32x2x3_t(pub float32x2_t, pub float32x2_t, pub float32x2_t);
/// ARM-specific type containing four `float32x2_t` vectors.
#[derive(Copy, Clone)]
pub struct float32x2x4_t(
pub float32x2_t,
pub float32x2_t,
pub float32x2_t,
pub float32x2_t,
);
/// ARM-specific type containing two `float32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct float32x4x2_t(pub float32x4_t, pub float32x4_t);
/// ARM-specific type containing three `float32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct float32x4x3_t(pub float32x4_t, pub float32x4_t, pub float32x4_t);
/// ARM-specific type containing four `float32x4_t` vectors.
#[derive(Copy, Clone)]
pub struct float32x4x4_t(
pub float32x4_t,
pub float32x4_t,
pub float32x4_t,
pub float32x4_t,
);
/// ARM-specific type containing four `int64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct int64x1x2_t(pub int64x1_t, pub int64x1_t);
/// ARM-specific type containing four `int64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct int64x1x3_t(pub int64x1_t, pub int64x1_t, pub int64x1_t);
/// ARM-specific type containing four `int64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct int64x1x4_t(pub int64x1_t, pub int64x1_t, pub int64x1_t, pub int64x1_t);
/// ARM-specific type containing four `int64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct int64x2x2_t(pub int64x2_t, pub int64x2_t);
/// ARM-specific type containing four `int64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct int64x2x3_t(pub int64x2_t, pub int64x2_t, pub int64x2_t);
/// ARM-specific type containing four `int64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct int64x2x4_t(pub int64x2_t, pub int64x2_t, pub int64x2_t, pub int64x2_t);
/// ARM-specific type containing four `uint64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct uint64x1x2_t(pub uint64x1_t, pub uint64x1_t);
/// ARM-specific type containing four `uint64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct uint64x1x3_t(pub uint64x1_t, pub uint64x1_t, pub uint64x1_t);
/// ARM-specific type containing four `uint64x1_t` vectors.
#[derive(Copy, Clone)]
pub struct uint64x1x4_t(
pub uint64x1_t,
pub uint64x1_t,
pub uint64x1_t,
pub uint64x1_t,
);
/// ARM-specific type containing four `uint64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct uint64x2x2_t(pub uint64x2_t, pub uint64x2_t);
/// ARM-specific type containing four `uint64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct uint64x2x3_t(pub uint64x2_t, pub uint64x2_t, pub uint64x2_t);
/// ARM-specific type containing four `uint64x2_t` vectors.
#[derive(Copy, Clone)]
pub struct uint64x2x4_t(
pub uint64x2_t,
pub uint64x2_t,
pub uint64x2_t,
pub uint64x2_t,
);
#[allow(improper_ctypes)]
extern "C" {
extern "unadjusted" {
// absolute value (64-bit)
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v8i8")]
@ -2867,11 +3117,7 @@ pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
// Based on the discussion in https://github.com/rust-lang/stdarch/pull/792
// `mov` seems to be an acceptable intrinsic to compile to
// #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(vmov, IMM5 = 1))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
static_assert_imm1!(IMM5);
simd_extract(v, IMM5 as u32)
@ -2882,10 +3128,7 @@ pub unsafe fn vgetq_lane_u64<const IMM5: i32>(v: uint64x2_t) -> u64 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
// FIXME: no 32bit this seems to be turned into two vmov.32 instructions
// validate correctness
#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
static_assert!(IMM5 : i32 where IMM5 == 0);
simd_extract(v, 0)
@ -2896,8 +3139,7 @@ pub unsafe fn vget_lane_u64<const IMM5: i32>(v: uint64x1_t) -> u64 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
static_assert_imm2!(IMM5);
simd_extract(v, IMM5 as u32)
@ -2908,8 +3150,7 @@ pub unsafe fn vget_lane_u16<const IMM5: i32>(v: uint16x4_t) -> u16 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.s16", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
static_assert_imm2!(IMM5);
simd_extract(v, IMM5 as u32)
@ -2920,8 +3161,7 @@ pub unsafe fn vget_lane_s16<const IMM5: i32>(v: int16x4_t) -> i16 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
static_assert_imm2!(IMM5);
simd_extract(v, IMM5 as u32)
@ -2932,8 +3172,7 @@ pub unsafe fn vget_lane_p16<const IMM5: i32>(v: poly16x4_t) -> p16 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
static_assert_imm1!(IMM5);
simd_extract(v, IMM5 as u32)
@ -2944,8 +3183,7 @@ pub unsafe fn vget_lane_u32<const IMM5: i32>(v: uint32x2_t) -> u32 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
static_assert_imm1!(IMM5);
simd_extract(v, IMM5 as u32)
@ -2956,8 +3194,7 @@ pub unsafe fn vget_lane_s32<const IMM5: i32>(v: int32x2_t) -> i32 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.f32", IMM5 = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
static_assert_imm1!(IMM5);
simd_extract(v, IMM5 as u32)
@ -2968,8 +3205,7 @@ pub unsafe fn vget_lane_f32<const IMM5: i32>(v: float32x2_t) -> f32 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.f32", IMM5 = 1))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 1))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 1))]
pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
static_assert_imm2!(IMM5);
simd_extract(v, IMM5 as u32)
@ -2980,8 +3216,7 @@ pub unsafe fn vgetq_lane_f32<const IMM5: i32>(v: float32x4_t) -> f32 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
static_assert!(IMM5 : i32 where IMM5 == 0);
simd_extract(v, IMM5 as u32)
@ -2992,8 +3227,7 @@ pub unsafe fn vget_lane_p64<const IMM5: i32>(v: poly64x1_t) -> p64 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
static_assert_imm1!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3004,8 +3238,7 @@ pub unsafe fn vgetq_lane_p64<const IMM5: i32>(v: poly64x2_t) -> p64 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
static_assert!(IMM5 : i32 where IMM5 == 0);
simd_extract(v, IMM5 as u32)
@ -3016,8 +3249,7 @@ pub unsafe fn vget_lane_s64<const IMM5: i32>(v: int64x1_t) -> i64 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 0))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, IMM5 = 0))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 0))]
pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
static_assert_imm1!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3028,8 +3260,7 @@ pub unsafe fn vgetq_lane_s64<const IMM5: i32>(v: int64x2_t) -> i64 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
static_assert_imm3!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3040,8 +3271,7 @@ pub unsafe fn vgetq_lane_u16<const IMM5: i32>(v: uint16x8_t) -> u16 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
static_assert_imm2!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3052,8 +3282,7 @@ pub unsafe fn vgetq_lane_u32<const IMM5: i32>(v: uint32x4_t) -> u32 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.s16", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
static_assert_imm3!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3064,8 +3293,7 @@ pub unsafe fn vgetq_lane_s16<const IMM5: i32>(v: int16x8_t) -> i16 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
static_assert_imm3!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3076,8 +3304,7 @@ pub unsafe fn vgetq_lane_p16<const IMM5: i32>(v: poly16x8_t) -> p16 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
static_assert_imm2!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3088,8 +3315,7 @@ pub unsafe fn vgetq_lane_s32<const IMM5: i32>(v: int32x4_t) -> i32 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
static_assert_imm3!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3100,8 +3326,7 @@ pub unsafe fn vget_lane_u8<const IMM5: i32>(v: uint8x8_t) -> u8 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.s8", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
static_assert_imm3!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3112,8 +3337,7 @@ pub unsafe fn vget_lane_s8<const IMM5: i32>(v: int8x8_t) -> i8 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
static_assert_imm3!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3124,8 +3348,7 @@ pub unsafe fn vget_lane_p8<const IMM5: i32>(v: poly8x8_t) -> p8 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
static_assert_imm4!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3136,8 +3359,7 @@ pub unsafe fn vgetq_lane_u8<const IMM5: i32>(v: uint8x16_t) -> u8 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.s8", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
static_assert_imm4!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3148,8 +3370,7 @@ pub unsafe fn vgetq_lane_s8<const IMM5: i32>(v: int8x16_t) -> i8 {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[rustc_legacy_const_generics(1)]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", IMM5 = 2))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, IMM5 = 2))]
#[cfg_attr(test, assert_instr(nop, IMM5 = 2))]
pub unsafe fn vgetq_lane_p8<const IMM5: i32>(v: poly8x16_t) -> p8 {
static_assert_imm4!(IMM5);
simd_extract(v, IMM5 as u32)
@ -3269,8 +3490,7 @@ pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
}
@ -3279,8 +3499,7 @@ pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
simd_shuffle4!(a, a, [0, 1, 2, 3])
}
@ -3289,8 +3508,7 @@ pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
simd_shuffle2!(a, a, [0, 1])
}
@ -3299,8 +3517,7 @@ pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
int64x1_t(simd_extract(a, 0))
}
@ -3309,8 +3526,7 @@ pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
}
@ -3319,8 +3535,7 @@ pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
simd_shuffle4!(a, a, [0, 1, 2, 3])
}
@ -3329,8 +3544,7 @@ pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
simd_shuffle2!(a, a, [0, 1])
}
@ -3339,8 +3553,7 @@ pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
uint64x1_t(simd_extract(a, 0))
}
@ -3349,8 +3562,7 @@ pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
}
@ -3359,8 +3571,7 @@ pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
simd_shuffle4!(a, a, [0, 1, 2, 3])
}
@ -3369,8 +3580,7 @@ pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t {
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr("ldr"))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))]
#[cfg_attr(test, assert_instr(nop))]
pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t {
simd_shuffle2!(a, a, [0, 1])
}

View file

@ -2033,6 +2033,81 @@ aarch64 = sqadd
link-aarch64 = sqadd._EXT_
generate i32, i64
/// Load multiple single-element structures to one, two, three, or four registers
name = vld1
out-suffix
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
test = load_test
aarch64 = ld1
link-aarch64 = ld1x2._EXT2_
arm = vld1
link-arm = vld1x2._EXT2_
generate *const i8:int8x8x2_t, *const i16:int16x4x2_t, *const i32:int32x2x2_t, *const i64:int64x1x2_t
generate *const i8:int8x16x2_t, *const i16:int16x8x2_t, *const i32:int32x4x2_t, *const i64:int64x2x2_t
link-aarch64 = ld1x3._EXT2_
link-arm = vld1x3._EXT2_
generate *const i8:int8x8x3_t, *const i16:int16x4x3_t, *const i32:int32x2x3_t, *const i64:int64x1x3_t
generate *const i8:int8x16x3_t, *const i16:int16x8x3_t, *const i32:int32x4x3_t, *const i64:int64x2x3_t
link-aarch64 = ld1x4._EXT2_
link-arm = vld1x4._EXT2_
generate *const i8:int8x8x4_t, *const i16:int16x4x4_t, *const i32:int32x2x4_t, *const i64:int64x1x4_t
generate *const i8:int8x16x4_t, *const i16:int16x8x4_t, *const i32:int32x4x4_t, *const i64:int64x2x4_t
/// Load multiple single-element structures to one, two, three, or four registers
name = vld1
out-suffix
multi_fn = transmute, {vld1-outsigned-noext, transmute(a)}
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
validate 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32
test = load_test
aarch64 = ld1
arm = vld1
generate *const u8:uint8x8x2_t, *const u16:uint16x4x2_t, *const u32:uint32x2x2_t, *const u64:uint64x1x2_t
generate *const u8:uint8x16x2_t, *const u16:uint16x8x2_t, *const u32:uint32x4x2_t, *const u64:uint64x2x2_t
generate *const u8:uint8x8x3_t, *const u16:uint16x4x3_t, *const u32:uint32x2x3_t, *const u64:uint64x1x3_t
generate *const u8:uint8x16x3_t, *const u16:uint16x8x3_t, *const u32:uint32x4x3_t, *const u64:uint64x2x3_t
generate *const u8:uint8x8x4_t, *const u16:uint16x4x4_t, *const u32:uint32x2x4_t, *const u64:uint64x1x4_t
generate *const u8:uint8x16x4_t, *const u16:uint16x8x4_t, *const u32:uint32x4x4_t, *const u64:uint64x2x4_t
generate *const p8:poly8x8x2_t, *const p8:poly8x8x3_t, *const p8:poly8x8x4_t
generate *const p8:poly8x16x2_t, *const p8:poly8x16x3_t, *const p8:poly8x16x4_t
generate *const p16:poly16x4x2_t, *const p16:poly16x4x3_t, *const p16:poly16x4x4_t
generate *const p16:poly16x8x2_t, *const p16:poly16x8x3_t, *const p16:poly16x8x4_t
/// Load multiple single-element structures to one, two, three, or four registers
name = vld1
out-suffix
a = 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
validate 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.
test = load_test
aarch64 = ld1
link-aarch64 = ld1x2._EXT2_
generate *const f64:float64x1x2_t, *const f64:float64x2x2_t
link-aarch64 = ld1x3._EXT2_
generate *const f64:float64x1x3_t, *const f64:float64x2x3_t
link-aarch64 = ld1x4._EXT2_
generate *const f64:float64x1x4_t, *const f64:float64x2x4_t
arm = vld1
link-aarch64 = ld1x2._EXT2_
link-arm = vld1x2._EXT2_
generate *const f32:float32x2x2_t, *const f32:float32x4x2_t
link-aarch64 = ld1x3._EXT2_
link-arm = vld1x3._EXT2_
generate *const f32:float32x2x3_t, *const f32:float32x4x3_t
link-aarch64 = ld1x4._EXT2_
link-arm = vld1x4._EXT2_
generate *const f32:float32x2x4_t, *const f32:float32x4x4_t
/// Multiply
name = vmul
a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2

View file

@ -51,38 +51,34 @@ const FLOAT_TYPES_64: [&str; 2] = [
];
fn type_len(t: &str) -> usize {
match t {
"int8x8_t" => 8,
"int8x16_t" => 16,
"int16x4_t" => 4,
"int16x8_t" => 8,
"int32x2_t" => 2,
"int32x4_t" => 4,
"int64x1_t" => 1,
"int64x2_t" => 2,
"uint8x8_t" => 8,
"uint8x16_t" => 16,
"uint16x4_t" => 4,
"uint16x8_t" => 8,
"uint32x2_t" => 2,
"uint32x4_t" => 4,
"uint64x1_t" => 1,
"uint64x2_t" => 2,
"float16x4_t" => 4,
"float16x8_t" => 8,
"float32x2_t" => 2,
"float32x4_t" => 4,
"float64x1_t" => 1,
"float64x2_t" => 2,
"poly8x8_t" => 8,
"poly8x16_t" => 16,
"poly16x4_t" => 4,
"poly16x8_t" => 8,
"poly64x1_t" => 1,
"poly64x2_t" => 2,
"i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "p8"
| "p16" | "p64" | "p128" => 1,
_ => panic!("unknown type: {}", t),
let s: Vec<_> = t.split("x").collect();
if s.len() == 2 {
match &s[1][0..2] {
"1_" => 1,
"2_" => 2,
"4_" => 4,
"8_" => 8,
"16" => 16,
_ => panic!("unknown type: {}", t),
}
} else if s.len() == 3 {
s[1].parse::<usize>().unwrap() * type_sub_len(t)
} else {
1
}
}
fn type_sub_len(t: &str) -> usize {
let s: Vec<_> = t.split('x').collect();
if s.len() != 3 {
1
} else {
match s[2] {
"2_t" => 2,
"3_t" => 3,
"4_t" => 4,
_ => panic!("unknown type len: {}", t),
}
}
}
@ -177,6 +173,84 @@ fn type_to_suffix(t: &str) -> &str {
"poly16x8_t" => "q_p16",
"poly64x1_t" => "_p64",
"poly64x2_t" => "q_p64",
"int8x8x2_t" => "_s8_x2",
"int8x8x3_t" => "_s8_x3",
"int8x8x4_t" => "_s8_x4",
"int16x4x2_t" => "_s16_x2",
"int16x4x3_t" => "_s16_x3",
"int16x4x4_t" => "_s16_x4",
"int32x2x2_t" => "_s32_x2",
"int32x2x3_t" => "_s32_x3",
"int32x2x4_t" => "_s32_x4",
"int64x1x2_t" => "_s64_x2",
"int64x1x3_t" => "_s64_x3",
"int64x1x4_t" => "_s64_x4",
"uint8x8x2_t" => "_u8_x2",
"uint8x8x3_t" => "_u8_x3",
"uint8x8x4_t" => "_u8_x4",
"uint16x4x2_t" => "_u16_x2",
"uint16x4x3_t" => "_u16_x3",
"uint16x4x4_t" => "_u16_x4",
"uint32x2x2_t" => "_u32_x2",
"uint32x2x3_t" => "_u32_x3",
"uint32x2x4_t" => "_u32_x4",
"uint64x1x2_t" => "_u64_x2",
"uint64x1x3_t" => "_u64_x3",
"uint64x1x4_t" => "_u64_x4",
"poly8x8x2_t" => "_p8_x2",
"poly8x8x3_t" => "_p8_x3",
"poly8x8x4_t" => "_p8_x4",
"poly16x4x2_t" => "_p16_x2",
"poly16x4x3_t" => "_p16_x3",
"poly16x4x4_t" => "_p16_x4",
"poly64x1x2_t" => "_p64_x2",
"poly64x1x3_t" => "_p64_x3",
"poly64x1x4_t" => "_p64_x4",
"float32x2x2_t" => "_f32_x2",
"float32x2x3_t" => "_f32_x3",
"float32x2x4_t" => "_f32_x4",
"float64x1x2_t" => "_f64_x2",
"float64x1x3_t" => "_f64_x3",
"float64x1x4_t" => "_f64_x4",
"int8x16x2_t" => "q_s8_x2",
"int8x16x3_t" => "q_s8_x3",
"int8x16x4_t" => "q_s8_x4",
"int16x8x2_t" => "q_s16_x2",
"int16x8x3_t" => "q_s16_x3",
"int16x8x4_t" => "q_s16_x4",
"int32x4x2_t" => "q_s32_x2",
"int32x4x3_t" => "q_s32_x3",
"int32x4x4_t" => "q_s32_x4",
"int64x2x2_t" => "q_s64_x2",
"int64x2x3_t" => "q_s64_x3",
"int64x2x4_t" => "q_s64_x4",
"uint8x16x2_t" => "q_u8_x2",
"uint8x16x3_t" => "q_u8_x3",
"uint8x16x4_t" => "q_u8_x4",
"uint16x8x2_t" => "q_u16_x2",
"uint16x8x3_t" => "q_u16_x3",
"uint16x8x4_t" => "q_u16_x4",
"uint32x4x2_t" => "q_u32_x2",
"uint32x4x3_t" => "q_u32_x3",
"uint32x4x4_t" => "q_u32_x4",
"uint64x2x2_t" => "q_u64_x2",
"uint64x2x3_t" => "q_u64_x3",
"uint64x2x4_t" => "q_u64_x4",
"poly8x16x2_t" => "q_p8_x2",
"poly8x16x3_t" => "q_p8_x3",
"poly8x16x4_t" => "q_p8_x4",
"poly16x8x2_t" => "q_p16_x2",
"poly16x8x3_t" => "q_p16_x3",
"poly16x8x4_t" => "q_p16_x4",
"poly64x2x2_t" => "q_p64_x2",
"poly64x2x3_t" => "q_p64_x3",
"poly64x2x4_t" => "q_p64_x4",
"float32x4x2_t" => "q_f32_x2",
"float32x4x3_t" => "q_f32_x3",
"float32x4x4_t" => "q_f32_x4",
"float64x2x2_t" => "q_f64_x2",
"float64x2x3_t" => "q_f64_x3",
"float64x2x4_t" => "q_f64_x4",
"i8" => "b_s8",
"i16" => "h_s16",
"i32" => "s_s32",
@ -274,18 +348,10 @@ fn type_to_lane_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
str
}
fn type_to_signed(t: &str) -> &str {
match t {
"int8x8_t" | "uint8x8_t" | "poly8x8_t" => "int8x8_t",
"int8x16_t" | "uint8x16_t" | "poly8x16_t" => "int8x16_t",
"int16x4_t" | "uint16x4_t" | "poly16x4_t" => "int16x4_t",
"int16x8_t" | "uint16x8_t" | "poly16x8_t" => "int16x8_t",
"int32x2_t" | "uint32x2_t" => "int32x2_t",
"int32x4_t" | "uint32x4_t" => "int32x4_t",
"int64x1_t" | "uint64x1_t" | "poly64x1_t" => "int64x1_t",
"int64x2_t" | "uint64x2_t" | "poly64x2_t" => "int64x2_t",
_ => panic!("unknown type: {}", t),
}
fn type_to_signed(t: &String) -> String {
let s = t.replace("uint", "int");
let s = s.replace("poly", "int");
s
}
fn type_to_unsigned(t: &str) -> &str {
@ -384,34 +450,34 @@ enum TargetFeature {
fn type_to_global_type(t: &str) -> &str {
match t {
"int8x8_t" => "i8x8",
"int8x16_t" => "i8x16",
"int16x4_t" => "i16x4",
"int16x8_t" => "i16x8",
"int32x2_t" => "i32x2",
"int32x4_t" => "i32x4",
"int64x1_t" => "i64x1",
"int64x2_t" => "i64x2",
"uint8x8_t" => "u8x8",
"uint8x16_t" => "u8x16",
"uint16x4_t" => "u16x4",
"uint16x8_t" => "u16x8",
"uint32x2_t" => "u32x2",
"uint32x4_t" => "u32x4",
"uint64x1_t" => "u64x1",
"uint64x2_t" => "u64x2",
"int8x8_t" | "int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t" => "i8x8",
"int8x16_t" | "int8x16x2_t" | "int8x16x3_t" | "int8x16x4_t" => "i8x16",
"int16x4_t" | "int16x4x2_t" | "int16x4x3_t" | "int16x4x4_t" => "i16x4",
"int16x8_t" | "int16x8x2_t" | "int16x8x3_t" | "int16x8x4_t" => "i16x8",
"int32x2_t" | "int32x2x2_t" | "int32x2x3_t" | "int32x2x4_t" => "i32x2",
"int32x4_t" | "int32x4x2_t" | "int32x4x3_t" | "int32x4x4_t" => "i32x4",
"int64x1_t" | "int64x1x2_t" | "int64x1x3_t" | "int64x1x4_t" => "i64x1",
"int64x2_t" | "int64x2x2_t" | "int64x2x3_t" | "int64x2x4_t" => "i64x2",
"uint8x8_t" | "uint8x8x2_t" | "uint8x8x3_t" | "uint8x8x4_t" => "u8x8",
"uint8x16_t" | "uint8x16x2_t" | "uint8x16x3_t" | "uint8x16x4_t" => "u8x16",
"uint16x4_t" | "uint16x4x2_t" | "uint16x4x3_t" | "uint16x4x4_t" => "u16x4",
"uint16x8_t" | "uint16x8x2_t" | "uint16x8x3_t" | "uint16x8x4_t" => "u16x8",
"uint32x2_t" | "uint32x2x2_t" | "uint32x2x3_t" | "uint32x2x4_t" => "u32x2",
"uint32x4_t" | "uint32x4x2_t" | "uint32x4x3_t" | "uint32x4x4_t" => "u32x4",
"uint64x1_t" | "uint64x1x2_t" | "uint64x1x3_t" | "uint64x1x4_t" => "u64x1",
"uint64x2_t" | "uint64x2x2_t" | "uint64x2x3_t" | "uint64x2x4_t" => "u64x2",
"float16x4_t" => "f16x4",
"float16x8_t" => "f16x8",
"float32x2_t" => "f32x2",
"float32x4_t" => "f32x4",
"float64x1_t" => "f64",
"float64x2_t" => "f64x2",
"poly8x8_t" => "i8x8",
"poly8x16_t" => "i8x16",
"poly16x4_t" => "i16x4",
"poly16x8_t" => "i16x8",
"poly64x1_t" => "i64x1",
"poly64x2_t" => "i64x2",
"float32x2_t" | "float32x2x2_t" | "float32x2x3_t" | "float32x2x4_t" => "f32x2",
"float32x4_t" | "float32x4x2_t" | "float32x4x3_t" | "float32x4x4_t" => "f32x4",
"float64x1_t" | "float64x1x2_t" | "float64x1x3_t" | "float64x1x4_t" => "f64",
"float64x2_t" | "float64x2x2_t" | "float64x2x3_t" | "float64x2x4_t" => "f64x2",
"poly8x8_t" | "poly8x8x2_t" | "poly8x8x3_t" | "poly8x8x4_t" => "i8x8",
"poly8x16_t" | "poly8x16x2_t" | "poly8x16x3_t" | "poly8x16x4_t" => "i8x16",
"poly16x4_t" | "poly16x4x2_t" | "poly16x4x3_t" | "poly16x4x4_t" => "i16x4",
"poly16x8_t" | "poly16x8x2_t" | "poly16x8x3_t" | "poly16x8x4_t" => "i16x8",
"poly64x1_t" | "poly64x1x2_t" | "poly64x1x3_t" | "poly64x1x4_t" => "i64x1",
"poly64x2_t" | "poly64x2x2_t" | "poly64x2x3_t" | "poly64x2x4_t" => "i64x2",
"i8" => "i8",
"i16" => "i16",
"i32" => "i32",
@ -432,18 +498,33 @@ fn type_to_global_type(t: &str) -> &str {
fn type_to_native_type(t: &str) -> &str {
match t {
"int8x8_t" | "int8x16_t" | "i8" => "i8",
"int16x4_t" | "int16x8_t" | "i16" => "i16",
"int32x2_t" | "int32x4_t" | "i32" => "i32",
"int64x1_t" | "int64x2_t" | "i64" => "i64",
"uint8x8_t" | "uint8x16_t" | "u8" => "u8",
"uint16x4_t" | "uint16x8_t" | "u16" => "u16",
"uint32x2_t" | "uint32x4_t" | "u32" => "u32",
"uint64x1_t" | "uint64x2_t" | "u64" => "u64",
"int8x8_t" | "int8x16_t" | "i8" | "int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t"
| "int8x16x2_t" | "int8x16x3_t" | "int8x16x4_t" => "i8",
"int16x4_t" | "int16x8_t" | "i16" | "int16x4x2_t" | "int16x4x3_t" | "int16x4x4_t"
| "int16x8x2_t" | "int16x8x3_t" | "int16x8x4_t" => "i16",
"int32x2_t" | "int32x4_t" | "i32" | "int32x2x2_t" | "int32x2x3_t" | "int32x2x4_t"
| "int32x4x2_t" | "int32x4x3_t" | "int32x4x4_t" => "i32",
"int64x1_t" | "int64x2_t" | "i64" | "int64x1x2_t" | "int64x1x3_t" | "int64x1x4_t"
| "int64x2x2_t" | "int64x2x3_t" | "int64x2x4_t" => "i64",
"uint8x8_t" | "uint8x16_t" | "u8" | "uint8x8x2_t" | "uint8x8x3_t" | "uint8x8x4_t"
| "uint8x16x2_t" | "uint8x16x3_t" | "uint8x16x4_t" => "u8",
"uint16x4_t" | "uint16x8_t" | "u16" | "uint16x4x2_t" | "uint16x4x3_t" | "uint16x4x4_t"
| "uint16x8x2_t" | "uint16x8x3_t" | "uint16x8x4_t" => "u16",
"uint32x2_t" | "uint32x4_t" | "u32" | "uint32x2x2_t" | "uint32x2x3_t" | "uint32x2x4_t"
| "uint32x4x2_t" | "uint32x4x3_t" | "uint32x4x4_t" => "u32",
"uint64x1_t" | "uint64x2_t" | "u64" | "uint64x1x2_t" | "uint64x1x3_t" | "uint64x1x4_t"
| "uint64x2x2_t" | "uint64x2x3_t" | "uint64x2x4_t" => "u64",
"float16x4_t" | "float16x8_t" => "f16",
"float32x2_t" | "float32x4_t" => "f32",
"float64x1_t" | "float64x2_t" => "f64",
"poly64x1_t" | "poly64x2_t" => "u64",
"float32x2_t" | "float32x4_t" | "float32x2x2_t" | "float32x2x3_t" | "float32x2x4_t"
| "float32x4x2_t" | "float32x4x3_t" | "float32x4x4_t" => "f32",
"float64x1_t" | "float64x2_t" | "float64x1x2_t" | "float64x1x3_t" | "float64x1x4_t"
| "float64x2x2_t" | "float64x2x3_t" | "float64x2x4_t" => "f64",
"poly8x8_t" | "poly8x16_t" | "poly8x8x2_t" | "poly8x8x3_t" | "poly8x8x4_t"
| "poly8x16x2_t" | "poly8x16x3_t" | "poly8x16x4_t" => "u8",
"poly16x4_t" | "poly16x8_t" | "poly16x4x2_t" | "poly16x4x3_t" | "poly16x4x4_t"
| "poly16x8x2_t" | "poly16x8x3_t" | "poly16x8x4_t" => "u16",
"poly64x1_t" | "poly64x2_t" | "poly64x1x2_t" | "poly64x1x3_t" | "poly64x1x4_t"
| "poly64x2x2_t" | "poly64x2x3_t" | "poly64x2x4_t" => "u64",
_ => panic!("unknown type: {}", t),
}
}
@ -510,6 +591,26 @@ fn type_to_ext(t: &str) -> &str {
"poly8x16_t" => "v16i8",
"poly16x4_t" => "v4i16",
"poly16x8_t" => "v8i16",
"int8x8x2_t" | "int8x8x3_t" | "int8x8x4_t" => "v8i8.p0i8",
"int16x4x2_t" | "int16x4x3_t" | "int16x4x4_t" => "v4i16.p0i16",
"int32x2x2_t" | "int32x2x3_t" | "int32x2x4_t" => "v2i32.p0i32",
"int64x1x2_t" | "int64x1x3_t" | "int64x1x4_t" => "v1i64.p0i64",
"uint8x8x2_t" | "uint8x8x3_t" | "uint8x8x4_t" => "v8i8.p0i8",
"uint16x4x2_t" | "uint16x4x3_t" | "uint16x4x4_t" => "v4i16.p0i16",
"uint32x2x2_t" | "uint32x2x3_t" | "uint32x2x4_t" => "v2i32.p0i32",
"uint64x1x2_t" | "uint64x1x3_t" | "uint64x1x4_t" => "v1i64.p0i64",
"float32x2x2_t" | "float32x2x3_t" | "float32x2x4_t" => "v2f32.p0f32",
"float64x1x2_t" | "float64x1x3_t" | "float64x1x4_t" => "v1f64.p0f64",
"int8x16x2_t" | "int8x16x3_t" | "int8x16x4_t" => "v16i8.p0i8",
"int16x8x2_t" | "int16x8x3_t" | "int16x8x4_t" => "v8i16.p0i16",
"int32x4x2_t" | "int32x4x3_t" | "int32x4x4_t" => "v4i32.p0i32",
"int64x2x2_t" | "int64x2x3_t" | "int64x2x4_t" => "v2i64.p0i64",
"uint8x16x2_t" | "uint8x16x3_t" | "uint8x16x4_t" => "v16i8.p0i8",
"uint16x8x2_t" | "uint16x8x3_t" | "uint16x8x4_t" => "v8i16.p0i16",
"uint32x4x2_t" | "uint32x4x3_t" | "uint32x4x4_t" => "v4i32.p0i32",
"uint64x2x2_t" | "uint64x2x3_t" | "uint64x2x4_t" => "v2i64.p0i64",
"float32x4x2_t" | "float32x4x3_t" | "float32x4x4_t" => "v4f32.p0f32",
"float64x2x2_t" | "float64x2x3_t" | "float64x2x4_t" => "v2f64.p0f64",
"i8" => "i8",
"i16" => "i16",
"i32" => "i32",
@ -522,6 +623,16 @@ fn type_to_ext(t: &str) -> &str {
"f64" => "f64",
"p64" => "p64",
"p128" => "p128",
"*const i8" => "i8",
"*const i16" => "i16",
"*const i32" => "i32",
"*const i64" => "i64",
"*const u8" => "i8",
"*const u16" => "i16",
"*const u32" => "i32",
"*const u64" => "i64",
"*const f32" => "f32",
"*const f64" => "f64",
/*
"poly64x1_t" => "i64x1",
"poly64x2_t" => "i64x2",
@ -858,9 +969,8 @@ fn gen_aarch64(
target: TargetFeature,
fixed: &Vec<String>,
multi_fn: &Vec<String>,
test_fn: &str,
) -> (String, String) {
let _global_t = type_to_global_type(in_t[0]);
let _global_ret_t = type_to_global_type(out_t);
let name = match suffix {
Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])),
NoQ => format!("{}{}", current_name, type_to_noq_suffix(in_t[1])),
@ -941,7 +1051,7 @@ fn gen_aarch64(
};
ext_c = format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
extern "unadjusted" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
@ -965,7 +1075,7 @@ fn gen_aarch64(
if const_aarch64.is_some() {
ext_c_const = format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
extern "unadjusted" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
@ -1162,17 +1272,94 @@ fn gen_aarch64(
current_comment, current_target, current_aarch64, const_assert, const_legacy, call
);
let test = gen_test(
&name,
in_t,
&out_t,
current_tests,
[type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
type_len(out_t),
para_num,
);
let test = if test_fn == "load_test" {
gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t))
} else {
gen_test(
&name,
in_t,
&out_t,
current_tests,
[type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
type_len(out_t),
para_num,
)
};
(function, test)
}
fn gen_load_test(
name: &str,
_in_t: &[&str; 3],
out_t: &str,
current_tests: &[(
Vec<String>,
Vec<String>,
Vec<String>,
Option<String>,
Vec<String>,
)],
len_out: usize,
) -> String {
let mut test = format!(
r#"
#[simd_test(enable = "neon")]
unsafe fn test_{}() {{"#,
name,
);
for (a, _, _, _, e) in current_tests {
let a: Vec<String> = a.iter().take(len_out + 1).cloned().collect();
let e: Vec<String> = e.iter().take(len_out).cloned().collect();
let mut input = String::from("[");
for i in 0..type_len(out_t) + 1 {
if i != 0 {
input.push_str(", ");
}
input.push_str(&a[i])
}
input.push_str("]");
let mut output = String::from("[");
for i in 0..type_sub_len(out_t) {
if i != 0 {
output.push_str(", ");
}
let sub_len = type_len(out_t) / type_sub_len(out_t);
if type_to_global_type(out_t) != "f64" {
let mut sub_output = format!("{}::new(", type_to_global_type(out_t));
for j in 0..sub_len {
if j != 0 {
sub_output.push_str(", ");
}
sub_output.push_str(&e[i * sub_len + j]);
}
sub_output.push_str(")");
output.push_str(&sub_output);
} else {
output.push_str(&e[i]);
}
}
output.push_str("]");
let t = format!(
r#"
let a: [{}; {}] = {};
let e: [{}; {}] = {};
let r: [{}; {}] = transmute({}(a[1..].as_ptr()));
assert_eq!(r, e);
"#,
type_to_native_type(out_t),
type_len(out_t) + 1,
input,
type_to_global_type(out_t),
type_sub_len(out_t),
output,
type_to_global_type(out_t),
type_sub_len(out_t),
name,
);
test.push_str(&t);
}
test.push_str(" }\n");
test
}
fn gen_test(
name: &str,
@ -1305,9 +1492,8 @@ fn gen_arm(
target: TargetFeature,
fixed: &Vec<String>,
multi_fn: &Vec<String>,
test_fn: &str,
) -> (String, String) {
let _global_t = type_to_global_type(in_t[0]);
let _global_ret_t = type_to_global_type(out_t);
let name = match suffix {
Normal => format!("{}{}", current_name, type_to_suffix(in_t[1])),
NoQ => format!("{}{}", current_name, type_to_noq_suffix(in_t[1])),
@ -1440,7 +1626,7 @@ fn gen_arm(
if out_t == link_arm_t[3] && out_t == link_aarch64_t[3] {
ext_c = format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
extern "unadjusted" {{
#[cfg_attr(target_arch = "arm", link_name = "{}")]
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
@ -1476,7 +1662,7 @@ fn gen_arm(
};
ext_c_arm.push_str(&format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
extern "unadjusted" {{
#[cfg_attr(target_arch = "arm", link_name = "{}")]
fn {}({}) -> {};
}}
@ -1504,7 +1690,7 @@ fn gen_arm(
if out_t != link_arm_t[3] {
ext_c_arm.push_str(&format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
extern "unadjusted" {{
#[cfg_attr(target_arch = "arm", link_name = "{}")]
fn {}({}) -> {};
}}
@ -1532,7 +1718,7 @@ fn gen_arm(
if const_aarch64.is_some() {
ext_c_aarch64.push_str(&format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
extern "unadjusted" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
@ -1557,7 +1743,7 @@ fn gen_arm(
if out_t != link_aarch64_t[3] {
ext_c_aarch64.push_str(&format!(
r#"#[allow(improper_ctypes)]
extern "C" {{
extern "unadjusted" {{
#[cfg_attr(target_arch = "aarch64", link_name = "{}")]
fn {}({}) -> {};
}}
@ -1880,15 +2066,19 @@ fn gen_arm(
call,
)
};
let test = gen_test(
&name,
in_t,
&out_t,
current_tests,
[type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
type_len(out_t),
para_num,
);
let test = if test_fn == "load_test" {
gen_load_test(&name, in_t, &out_t, current_tests, type_len(out_t))
} else {
gen_test(
&name,
in_t,
&out_t,
current_tests,
[type_len(in_t[0]), type_len(in_t[1]), type_len(in_t[2])],
type_len(out_t),
para_num,
)
};
(function, test)
}
@ -2305,7 +2495,9 @@ fn get_call(
} else if fn_format[1] == "in2lane" {
fn_name.push_str(&type_to_lane_suffixes(out_t, in_t[2]));
} else if fn_format[1] == "signed" {
fn_name.push_str(type_to_suffix(type_to_signed(in_t[1])));
fn_name.push_str(type_to_suffix(&type_to_signed(&String::from(in_t[1]))));
} else if fn_format[1] == "outsigned" {
fn_name.push_str(type_to_suffix(&type_to_signed(&String::from(out_t))));
} else if fn_format[1] == "unsigned" {
fn_name.push_str(type_to_suffix(type_to_unsigned(in_t[1])));
} else if fn_format[1] == "doubleself" {
@ -2315,7 +2507,7 @@ fn get_call(
} else if fn_format[1] == "noqself" {
fn_name.push_str(type_to_noq_suffix(in_t[1]));
} else if fn_format[1] == "noqsigned" {
fn_name.push_str(type_to_noq_suffix(type_to_signed(in_t[1])));
fn_name.push_str(type_to_noq_suffix(&type_to_signed(&String::from(in_t[1]))));
} else if fn_format[1] == "nosuffix" {
} else if fn_format[1] == "in_len" {
fn_name.push_str(&type_len(in_t[1]).to_string());
@ -2330,7 +2522,7 @@ fn get_call(
} else if fn_format[1] == "nin0" {
fn_name.push_str(type_to_n_suffix(in_t[0]));
} else if fn_format[1] == "nsigned" {
fn_name.push_str(type_to_n_suffix(type_to_signed(in_t[1])));
fn_name.push_str(type_to_n_suffix(&type_to_signed(&String::from(in_t[1]))));
} else if fn_format[1] == "in_ntt" {
fn_name.push_str(type_to_suffix(native_type_to_type(in_t[1])));
} else if fn_format[1] == "out_ntt" {
@ -2410,6 +2602,7 @@ fn main() -> io::Result<()> {
)> = Vec::new();
let mut multi_fn: Vec<String> = Vec::new();
let mut target: TargetFeature = Default;
let mut test_fn = "normal";
//
// THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY
@ -2491,6 +2684,7 @@ mod test {
n = None;
multi_fn = Vec::new();
target = Default;
test_fn = "normal";
} else if line.starts_with("//") {
} else if line.starts_with("name = ") {
current_name = Some(String::from(&line[7..]));
@ -2547,6 +2741,14 @@ mod test {
link_arm = Some(String::from(&line[11..]));
} else if line.starts_with("const-arm = ") {
const_arm = Some(String::from(&line[12..]));
} else if line.starts_with("test = ") {
test_fn = if line.contains("load_test") {
"load_test"
} else if line.contains("store_test") {
"store_test"
} else {
"normal"
}
} else if line.starts_with("target = ") {
target = match Some(String::from(&line[9..])) {
Some(input) => match input.as_str() {
@ -2618,6 +2820,7 @@ mod test {
target,
&fixed,
&multi_fn,
test_fn,
);
out_arm.push_str(&function);
tests_arm.push_str(&test);
@ -2638,6 +2841,7 @@ mod test {
target,
&fixed,
&multi_fn,
test_fn,
);
out_aarch64.push_str(&function);
tests_aarch64.push_str(&test);

View file

@ -218,11 +218,29 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"int8x16_t" => quote! { &I8X16 },
"int16x2_t" => quote! { &I16X2 },
"int16x4_t" => quote! { &I16X4 },
"int16x4x2_t" => quote! { &I16X4X2 },
"int16x4x3_t" => quote! { &I16x4x3 },
"int16x4x4_t" => quote! { &I16x4x4 },
"int16x8_t" => quote! { &I16X8 },
"int16x8x2_t" => quote! { &I16X8X2 },
"int16x8x3_t" => quote! { &I16x8x3 },
"int16x8x4_t" => quote! { &I16x8x4 },
"int32x2_t" => quote! { &I32X2 },
"int32x2x2_t" => quote! { &I32X2X2 },
"int32x2x3_t" => quote! { &I32X2X3 },
"int32x2x4_t" => quote! { &I32X2X4 },
"int32x4_t" => quote! { &I32X4 },
"int32x4x2_t" => quote! { &I32X4X2 },
"int32x4x3_t" => quote! { &I32X4X3 },
"int32x4x4_t" => quote! { &I32X4X4 },
"int64x1_t" => quote! { &I64X1 },
"int64x1x2_t" => quote! { &I64X1X2 },
"int64x1x3_t" => quote! { &I64X1X3 },
"int64x1x4_t" => quote! { &I64X1X4 },
"int64x2_t" => quote! { &I64X2 },
"int64x2x2_t" => quote! { &I64X2X2 },
"int64x2x3_t" => quote! { &I64X2X3 },
"int64x2x4_t" => quote! { &I64X2X4 },
"uint8x8_t" => quote! { &U8X8 },
"uint8x4_t" => quote! { &U8X4 },
"uint8x8x2_t" => quote! { &U8X8X2 },
@ -233,15 +251,45 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"uint8x8x4_t" => quote! { &U8X8X4 },
"uint8x16_t" => quote! { &U8X16 },
"uint16x4_t" => quote! { &U16X4 },
"uint16x4x2_t" => quote! { &U16X4X2 },
"uint16x4x3_t" => quote! { &U16x4x3 },
"uint16x4x4_t" => quote! { &U16x4x4 },
"uint16x8_t" => quote! { &U16X8 },
"uint16x8x2_t" => quote! { &U16X8X2 },
"uint16x8x3_t" => quote! { &U16x8x3 },
"uint16x8x4_t" => quote! { &U16x8x4 },
"uint32x2_t" => quote! { &U32X2 },
"uint32x2x2_t" => quote! { &U32X2X2 },
"uint32x2x3_t" => quote! { &U32X2X3 },
"uint32x2x4_t" => quote! { &U32X2X4 },
"uint32x4_t" => quote! { &U32X4 },
"uint32x4x2_t" => quote! { &U32X4X2 },
"uint32x4x3_t" => quote! { &U32X4X3 },
"uint32x4x4_t" => quote! { &U32X4X4 },
"uint64x1_t" => quote! { &U64X1 },
"uint64x1x2_t" => quote! { &U64X1X2 },
"uint64x1x3_t" => quote! { &U64X1X3 },
"uint64x1x4_t" => quote! { &U64X1X4 },
"uint64x2_t" => quote! { &U64X2 },
"uint64x2x2_t" => quote! { &U64X2X2 },
"uint64x2x3_t" => quote! { &U64X2X3 },
"uint64x2x4_t" => quote! { &U64X2X4 },
"float32x2_t" => quote! { &F32X2 },
"float32x2x2_t" => quote! { &F32X2X2 },
"float32x2x3_t" => quote! { &F32X2X3 },
"float32x2x4_t" => quote! { &F32X2X4 },
"float32x4_t" => quote! { &F32X4 },
"float32x4x2_t" => quote! { &F32X4X2 },
"float32x4x3_t" => quote! { &F32X4X3 },
"float32x4x4_t" => quote! { &F32X4X4 },
"float64x1_t" => quote! { &F64X1 },
"float64x1x2_t" => quote! { &F64X1X2 },
"float64x1x3_t" => quote! { &F64X1X3 },
"float64x1x4_t" => quote! { &F64X1X4 },
"float64x2_t" => quote! { &F64X2 },
"float64x2x2_t" => quote! { &F64X2X2 },
"float64x2x3_t" => quote! { &F64X2X3 },
"float64x2x4_t" => quote! { &F64X2X4 },
"poly8x8_t" => quote! { &POLY8X8 },
"poly8x8x2_t" => quote! { &POLY8X8X2 },
"poly8x8x3_t" => quote! { &POLY8X8X3 },
@ -254,7 +302,13 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"poly64x2_t" => quote! { &POLY64X2 },
"poly8x16_t" => quote! { &POLY8X16 },
"poly16x4_t" => quote! { &POLY16X4 },
"poly16x4x2_t" => quote! { &POLY16X4X2 },
"poly16x4x3_t" => quote! { &POLY16X4X3 },
"poly16x4x4_t" => quote! { &POLY16X4X4 },
"poly16x8_t" => quote! { &POLY16X8 },
"poly16x8x2_t" => quote! { &POLY16X8X2 },
"poly16x8x3_t" => quote! { &POLY16X8X3 },
"poly16x8x4_t" => quote! { &POLY16X8X4 },
"p128" => quote! { &P128 },
"v16i8" => quote! { &v16i8 },