add arm neon vector types (#384)
This commit is contained in:
parent
5f77210b34
commit
ff53ec6cb2
23 changed files with 1228 additions and 655 deletions
|
|
@ -12,6 +12,16 @@ export RUST_TEST_THREADS=1
|
|||
|
||||
FEATURES="strict,$FEATURES"
|
||||
|
||||
# FIXME: on armv7 neon intrinsics require the neon target-feature to be
|
||||
# unconditionally enabled.
|
||||
case ${TARGET} in
|
||||
armv7*)
|
||||
export RUSTFLAGS="${RUSTFLAGS} -C target-feature=+neon"
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "RUSTFLAGS=${RUSTFLAGS}"
|
||||
echo "FEATURES=${FEATURES}"
|
||||
echo "OBJDUMP=${OBJDUMP}"
|
||||
|
|
|
|||
|
|
@ -11,7 +11,5 @@
|
|||
mod v8;
|
||||
pub use self::v8::*;
|
||||
|
||||
#[cfg(target_feature = "neon")]
|
||||
mod neon;
|
||||
#[cfg(target_feature = "neon")]
|
||||
pub use self::neon::*;
|
||||
|
|
|
|||
|
|
@ -6,20 +6,51 @@
|
|||
use stdsimd_test::assert_instr;
|
||||
use coresimd::simd_llvm::simd_add;
|
||||
use coresimd::simd::*;
|
||||
use coresimd::arm::*;
|
||||
|
||||
types! {
|
||||
/// ARM-specific 64-bit wide vector of one packed `f64`.
|
||||
pub struct float64x1_t(f64); // FIXME: check this!
|
||||
/// ARM-specific 128-bit wide vector of two packed `f64`.
|
||||
pub struct float64x2_t(f64, f64);
|
||||
}
|
||||
impl_from_bits_!(
|
||||
float64x1_t: u32x2,
|
||||
i32x2,
|
||||
f32x2,
|
||||
u16x4,
|
||||
i16x4,
|
||||
u8x8,
|
||||
i8x8,
|
||||
b8x8
|
||||
);
|
||||
impl_from_bits_!(
|
||||
float64x2_t: u64x2,
|
||||
i64x2,
|
||||
f64x2,
|
||||
u32x4,
|
||||
i32x4,
|
||||
f32x4,
|
||||
u16x8,
|
||||
i16x8,
|
||||
u8x16,
|
||||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
|
||||
/// Vector add.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fadd))]
|
||||
pub unsafe fn vadd_f64(a: f64, b: f64) -> f64 {
|
||||
a + b
|
||||
pub unsafe fn vadd_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t {
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
||||
/// Vector add.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fadd))]
|
||||
pub unsafe fn vaddq_f64(a: f64x2, b: f64x2) -> f64x2 {
|
||||
pub unsafe fn vaddq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
||||
|
|
@ -27,85 +58,85 @@ pub unsafe fn vaddq_f64(a: f64x2, b: f64x2) -> f64x2 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(add))]
|
||||
pub unsafe fn vaddd_s64(a: i64, b: i64) -> i64 {
|
||||
a + b
|
||||
pub unsafe fn vaddd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t {
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
||||
/// Vector add.
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(add))]
|
||||
pub unsafe fn vaddd_u64(a: u64, b: u64) -> u64 {
|
||||
a + b
|
||||
pub unsafe fn vaddd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t {
|
||||
simd_add(a, b)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"]
|
||||
fn vmaxv_s8_(a: i8x8) -> i8;
|
||||
fn vmaxv_s8_(a: int8x8_t) -> i8;
|
||||
#[link_name = "llvm.aarch64.neon.smaxv.i8.6i8"]
|
||||
fn vmaxvq_s8_(a: i8x16) -> i8;
|
||||
fn vmaxvq_s8_(a: int8x16_t) -> i8;
|
||||
#[link_name = "llvm.aarch64.neon.smaxv.i16.v4i16"]
|
||||
fn vmaxv_s16_(a: i16x4) -> i16;
|
||||
fn vmaxv_s16_(a: int16x4_t) -> i16;
|
||||
#[link_name = "llvm.aarch64.neon.smaxv.i16.v8i16"]
|
||||
fn vmaxvq_s16_(a: i16x8) -> i16;
|
||||
fn vmaxvq_s16_(a: int16x8_t) -> i16;
|
||||
#[link_name = "llvm.aarch64.neon.smaxv.i32.v2i32"]
|
||||
fn vmaxv_s32_(a: i32x2) -> i32;
|
||||
fn vmaxv_s32_(a: int32x2_t) -> i32;
|
||||
#[link_name = "llvm.aarch64.neon.smaxv.i32.v4i32"]
|
||||
fn vmaxvq_s32_(a: i32x4) -> i32;
|
||||
fn vmaxvq_s32_(a: int32x4_t) -> i32;
|
||||
|
||||
#[link_name = "llvm.aarch64.neon.umaxv.i8.v8i8"]
|
||||
fn vmaxv_u8_(a: u8x8) -> u8;
|
||||
fn vmaxv_u8_(a: uint8x8_t) -> u8;
|
||||
#[link_name = "llvm.aarch64.neon.umaxv.i8.6i8"]
|
||||
fn vmaxvq_u8_(a: u8x16) -> u8;
|
||||
fn vmaxvq_u8_(a: uint8x16_t) -> u8;
|
||||
#[link_name = "llvm.aarch64.neon.umaxv.i16.v4i16"]
|
||||
fn vmaxv_u16_(a: u16x4) -> u16;
|
||||
fn vmaxv_u16_(a: uint16x4_t) -> u16;
|
||||
#[link_name = "llvm.aarch64.neon.umaxv.i16.v8i16"]
|
||||
fn vmaxvq_u16_(a: u16x8) -> u16;
|
||||
fn vmaxvq_u16_(a: uint16x8_t) -> u16;
|
||||
#[link_name = "llvm.aarch64.neon.umaxv.i32.v2i32"]
|
||||
fn vmaxv_u32_(a: u32x2) -> u32;
|
||||
fn vmaxv_u32_(a: uint32x2_t) -> u32;
|
||||
#[link_name = "llvm.aarch64.neon.umaxv.i32.v4i32"]
|
||||
fn vmaxvq_u32_(a: u32x4) -> u32;
|
||||
fn vmaxvq_u32_(a: uint32x4_t) -> u32;
|
||||
|
||||
#[link_name = "llvm.aarch64.neon.fmaxv.f32.v2f32"]
|
||||
fn vmaxv_f32_(a: f32x2) -> f32;
|
||||
fn vmaxv_f32_(a: float32x2_t) -> f32;
|
||||
#[link_name = "llvm.aarch64.neon.fmaxv.f32.v4f32"]
|
||||
fn vmaxvq_f32_(a: f32x4) -> f32;
|
||||
fn vmaxvq_f32_(a: float32x4_t) -> f32;
|
||||
#[link_name = "llvm.aarch64.neon.fmaxv.f64.v2f64"]
|
||||
fn vmaxvq_f64_(a: f64x2) -> f64;
|
||||
fn vmaxvq_f64_(a: float64x2_t) -> f64;
|
||||
|
||||
#[link_name = "llvm.aarch64.neon.sminv.i8.v8i8"]
|
||||
fn vminv_s8_(a: i8x8) -> i8;
|
||||
fn vminv_s8_(a: int8x8_t) -> i8;
|
||||
#[link_name = "llvm.aarch64.neon.sminv.i8.6i8"]
|
||||
fn vminvq_s8_(a: i8x16) -> i8;
|
||||
fn vminvq_s8_(a: int8x16_t) -> i8;
|
||||
#[link_name = "llvm.aarch64.neon.sminv.i16.v4i16"]
|
||||
fn vminv_s16_(a: i16x4) -> i16;
|
||||
fn vminv_s16_(a: int16x4_t) -> i16;
|
||||
#[link_name = "llvm.aarch64.neon.sminv.i16.v8i16"]
|
||||
fn vminvq_s16_(a: i16x8) -> i16;
|
||||
fn vminvq_s16_(a: int16x8_t) -> i16;
|
||||
#[link_name = "llvm.aarch64.neon.sminv.i32.v2i32"]
|
||||
fn vminv_s32_(a: i32x2) -> i32;
|
||||
fn vminv_s32_(a: int32x2_t) -> i32;
|
||||
#[link_name = "llvm.aarch64.neon.sminv.i32.v4i32"]
|
||||
fn vminvq_s32_(a: i32x4) -> i32;
|
||||
fn vminvq_s32_(a: int32x4_t) -> i32;
|
||||
|
||||
#[link_name = "llvm.aarch64.neon.uminv.i8.v8i8"]
|
||||
fn vminv_u8_(a: u8x8) -> u8;
|
||||
fn vminv_u8_(a: uint8x8_t) -> u8;
|
||||
#[link_name = "llvm.aarch64.neon.uminv.i8.6i8"]
|
||||
fn vminvq_u8_(a: u8x16) -> u8;
|
||||
fn vminvq_u8_(a: uint8x16_t) -> u8;
|
||||
#[link_name = "llvm.aarch64.neon.uminv.i16.v4i16"]
|
||||
fn vminv_u16_(a: u16x4) -> u16;
|
||||
fn vminv_u16_(a: uint16x4_t) -> u16;
|
||||
#[link_name = "llvm.aarch64.neon.uminv.i16.v8i16"]
|
||||
fn vminvq_u16_(a: u16x8) -> u16;
|
||||
fn vminvq_u16_(a: uint16x8_t) -> u16;
|
||||
#[link_name = "llvm.aarch64.neon.uminv.i32.v2i32"]
|
||||
fn vminv_u32_(a: u32x2) -> u32;
|
||||
fn vminv_u32_(a: uint32x2_t) -> u32;
|
||||
#[link_name = "llvm.aarch64.neon.uminv.i32.v4i32"]
|
||||
fn vminvq_u32_(a: u32x4) -> u32;
|
||||
fn vminvq_u32_(a: uint32x4_t) -> u32;
|
||||
|
||||
#[link_name = "llvm.aarch64.neon.fminv.f32.v2f32"]
|
||||
fn vminv_f32_(a: f32x2) -> f32;
|
||||
fn vminv_f32_(a: float32x2_t) -> f32;
|
||||
#[link_name = "llvm.aarch64.neon.fminv.f32.v4f32"]
|
||||
fn vminvq_f32_(a: f32x4) -> f32;
|
||||
fn vminvq_f32_(a: float32x4_t) -> f32;
|
||||
#[link_name = "llvm.aarch64.neon.fminv.f64.v2f64"]
|
||||
fn vminvq_f64_(a: f64x2) -> f64;
|
||||
fn vminvq_f64_(a: float64x2_t) -> f64;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -113,7 +144,7 @@ extern "C" {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smaxv))]
|
||||
pub unsafe fn vmaxv_s8(a: i8x8) -> i8 {
|
||||
pub unsafe fn vmaxv_s8(a: int8x8_t) -> i8 {
|
||||
vmaxv_s8_(a)
|
||||
}
|
||||
|
||||
|
|
@ -121,7 +152,7 @@ pub unsafe fn vmaxv_s8(a: i8x8) -> i8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smaxv))]
|
||||
pub unsafe fn vmaxvq_s8(a: i8x16) -> i8 {
|
||||
pub unsafe fn vmaxvq_s8(a: int8x16_t) -> i8 {
|
||||
vmaxvq_s8_(a)
|
||||
}
|
||||
|
||||
|
|
@ -129,7 +160,7 @@ pub unsafe fn vmaxvq_s8(a: i8x16) -> i8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smaxv))]
|
||||
pub unsafe fn vmaxv_s16(a: i16x4) -> i16 {
|
||||
pub unsafe fn vmaxv_s16(a: int16x4_t) -> i16 {
|
||||
vmaxv_s16_(a)
|
||||
}
|
||||
|
||||
|
|
@ -137,7 +168,7 @@ pub unsafe fn vmaxv_s16(a: i16x4) -> i16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smaxv))]
|
||||
pub unsafe fn vmaxvq_s16(a: i16x8) -> i16 {
|
||||
pub unsafe fn vmaxvq_s16(a: int16x8_t) -> i16 {
|
||||
vmaxvq_s16_(a)
|
||||
}
|
||||
|
||||
|
|
@ -145,7 +176,7 @@ pub unsafe fn vmaxvq_s16(a: i16x8) -> i16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smaxp))]
|
||||
pub unsafe fn vmaxv_s32(a: i32x2) -> i32 {
|
||||
pub unsafe fn vmaxv_s32(a: int32x2_t) -> i32 {
|
||||
vmaxv_s32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -153,7 +184,7 @@ pub unsafe fn vmaxv_s32(a: i32x2) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(smaxv))]
|
||||
pub unsafe fn vmaxvq_s32(a: i32x4) -> i32 {
|
||||
pub unsafe fn vmaxvq_s32(a: int32x4_t) -> i32 {
|
||||
vmaxvq_s32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -161,7 +192,7 @@ pub unsafe fn vmaxvq_s32(a: i32x4) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umaxv))]
|
||||
pub unsafe fn vmaxv_u8(a: u8x8) -> u8 {
|
||||
pub unsafe fn vmaxv_u8(a: uint8x8_t) -> u8 {
|
||||
vmaxv_u8_(a)
|
||||
}
|
||||
|
||||
|
|
@ -169,7 +200,7 @@ pub unsafe fn vmaxv_u8(a: u8x8) -> u8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umaxv))]
|
||||
pub unsafe fn vmaxvq_u8(a: u8x16) -> u8 {
|
||||
pub unsafe fn vmaxvq_u8(a: uint8x16_t) -> u8 {
|
||||
vmaxvq_u8_(a)
|
||||
}
|
||||
|
||||
|
|
@ -177,7 +208,7 @@ pub unsafe fn vmaxvq_u8(a: u8x16) -> u8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umaxv))]
|
||||
pub unsafe fn vmaxv_u16(a: u16x4) -> u16 {
|
||||
pub unsafe fn vmaxv_u16(a: uint16x4_t) -> u16 {
|
||||
vmaxv_u16_(a)
|
||||
}
|
||||
|
||||
|
|
@ -185,7 +216,7 @@ pub unsafe fn vmaxv_u16(a: u16x4) -> u16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umaxv))]
|
||||
pub unsafe fn vmaxvq_u16(a: u16x8) -> u16 {
|
||||
pub unsafe fn vmaxvq_u16(a: uint16x8_t) -> u16 {
|
||||
vmaxvq_u16_(a)
|
||||
}
|
||||
|
||||
|
|
@ -193,7 +224,7 @@ pub unsafe fn vmaxvq_u16(a: u16x8) -> u16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umaxp))]
|
||||
pub unsafe fn vmaxv_u32(a: u32x2) -> u32 {
|
||||
pub unsafe fn vmaxv_u32(a: uint32x2_t) -> u32 {
|
||||
vmaxv_u32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -201,7 +232,7 @@ pub unsafe fn vmaxv_u32(a: u32x2) -> u32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(umaxv))]
|
||||
pub unsafe fn vmaxvq_u32(a: u32x4) -> u32 {
|
||||
pub unsafe fn vmaxvq_u32(a: uint32x4_t) -> u32 {
|
||||
vmaxvq_u32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -209,7 +240,7 @@ pub unsafe fn vmaxvq_u32(a: u32x4) -> u32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmaxp))]
|
||||
pub unsafe fn vmaxv_f32(a: f32x2) -> f32 {
|
||||
pub unsafe fn vmaxv_f32(a: float32x2_t) -> f32 {
|
||||
vmaxv_f32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -217,7 +248,7 @@ pub unsafe fn vmaxv_f32(a: f32x2) -> f32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmaxv))]
|
||||
pub unsafe fn vmaxvq_f32(a: f32x4) -> f32 {
|
||||
pub unsafe fn vmaxvq_f32(a: float32x4_t) -> f32 {
|
||||
vmaxvq_f32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -225,7 +256,7 @@ pub unsafe fn vmaxvq_f32(a: f32x4) -> f32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fmaxp))]
|
||||
pub unsafe fn vmaxvq_f64(a: f64x2) -> f64 {
|
||||
pub unsafe fn vmaxvq_f64(a: float64x2_t) -> f64 {
|
||||
vmaxvq_f64_(a)
|
||||
}
|
||||
|
||||
|
|
@ -233,7 +264,7 @@ pub unsafe fn vmaxvq_f64(a: f64x2) -> f64 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sminv))]
|
||||
pub unsafe fn vminv_s8(a: i8x8) -> i8 {
|
||||
pub unsafe fn vminv_s8(a: int8x8_t) -> i8 {
|
||||
vminv_s8_(a)
|
||||
}
|
||||
|
||||
|
|
@ -241,7 +272,7 @@ pub unsafe fn vminv_s8(a: i8x8) -> i8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sminv))]
|
||||
pub unsafe fn vminvq_s8(a: i8x16) -> i8 {
|
||||
pub unsafe fn vminvq_s8(a: int8x16_t) -> i8 {
|
||||
vminvq_s8_(a)
|
||||
}
|
||||
|
||||
|
|
@ -249,7 +280,7 @@ pub unsafe fn vminvq_s8(a: i8x16) -> i8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sminv))]
|
||||
pub unsafe fn vminv_s16(a: i16x4) -> i16 {
|
||||
pub unsafe fn vminv_s16(a: int16x4_t) -> i16 {
|
||||
vminv_s16_(a)
|
||||
}
|
||||
|
||||
|
|
@ -257,7 +288,7 @@ pub unsafe fn vminv_s16(a: i16x4) -> i16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sminv))]
|
||||
pub unsafe fn vminvq_s16(a: i16x8) -> i16 {
|
||||
pub unsafe fn vminvq_s16(a: int16x8_t) -> i16 {
|
||||
vminvq_s16_(a)
|
||||
}
|
||||
|
||||
|
|
@ -265,7 +296,7 @@ pub unsafe fn vminvq_s16(a: i16x8) -> i16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sminp))]
|
||||
pub unsafe fn vminv_s32(a: i32x2) -> i32 {
|
||||
pub unsafe fn vminv_s32(a: int32x2_t) -> i32 {
|
||||
vminv_s32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -273,7 +304,7 @@ pub unsafe fn vminv_s32(a: i32x2) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(sminv))]
|
||||
pub unsafe fn vminvq_s32(a: i32x4) -> i32 {
|
||||
pub unsafe fn vminvq_s32(a: int32x4_t) -> i32 {
|
||||
vminvq_s32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -281,7 +312,7 @@ pub unsafe fn vminvq_s32(a: i32x4) -> i32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uminv))]
|
||||
pub unsafe fn vminv_u8(a: u8x8) -> u8 {
|
||||
pub unsafe fn vminv_u8(a: uint8x8_t) -> u8 {
|
||||
vminv_u8_(a)
|
||||
}
|
||||
|
||||
|
|
@ -289,7 +320,7 @@ pub unsafe fn vminv_u8(a: u8x8) -> u8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uminv))]
|
||||
pub unsafe fn vminvq_u8(a: u8x16) -> u8 {
|
||||
pub unsafe fn vminvq_u8(a: uint8x16_t) -> u8 {
|
||||
vminvq_u8_(a)
|
||||
}
|
||||
|
||||
|
|
@ -297,7 +328,7 @@ pub unsafe fn vminvq_u8(a: u8x16) -> u8 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uminv))]
|
||||
pub unsafe fn vminv_u16(a: u16x4) -> u16 {
|
||||
pub unsafe fn vminv_u16(a: uint16x4_t) -> u16 {
|
||||
vminv_u16_(a)
|
||||
}
|
||||
|
||||
|
|
@ -305,7 +336,7 @@ pub unsafe fn vminv_u16(a: u16x4) -> u16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uminv))]
|
||||
pub unsafe fn vminvq_u16(a: u16x8) -> u16 {
|
||||
pub unsafe fn vminvq_u16(a: uint16x8_t) -> u16 {
|
||||
vminvq_u16_(a)
|
||||
}
|
||||
|
||||
|
|
@ -313,7 +344,7 @@ pub unsafe fn vminvq_u16(a: u16x8) -> u16 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uminp))]
|
||||
pub unsafe fn vminv_u32(a: u32x2) -> u32 {
|
||||
pub unsafe fn vminv_u32(a: uint32x2_t) -> u32 {
|
||||
vminv_u32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -321,7 +352,7 @@ pub unsafe fn vminv_u32(a: u32x2) -> u32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(uminv))]
|
||||
pub unsafe fn vminvq_u32(a: u32x4) -> u32 {
|
||||
pub unsafe fn vminvq_u32(a: uint32x4_t) -> u32 {
|
||||
vminvq_u32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -329,7 +360,7 @@ pub unsafe fn vminvq_u32(a: u32x4) -> u32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fminp))]
|
||||
pub unsafe fn vminv_f32(a: f32x2) -> f32 {
|
||||
pub unsafe fn vminv_f32(a: float32x2_t) -> f32 {
|
||||
vminv_f32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -337,7 +368,7 @@ pub unsafe fn vminv_f32(a: f32x2) -> f32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fminv))]
|
||||
pub unsafe fn vminvq_f32(a: f32x4) -> f32 {
|
||||
pub unsafe fn vminvq_f32(a: float32x4_t) -> f32 {
|
||||
vminvq_f32_(a)
|
||||
}
|
||||
|
||||
|
|
@ -345,253 +376,257 @@ pub unsafe fn vminvq_f32(a: f32x4) -> f32 {
|
|||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(fminp))]
|
||||
pub unsafe fn vminvq_f64(a: f64x2) -> f64 {
|
||||
pub unsafe fn vminvq_f64(a: float64x2_t) -> f64 {
|
||||
vminvq_f64_(a)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use simd::*;
|
||||
use coresimd::aarch64::neon;
|
||||
use stdsimd_test::simd_test;
|
||||
use simd::*;
|
||||
use coresimd::aarch64::*;
|
||||
use std::mem;
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vadd_f64() {
|
||||
unsafe fn test_vadd_f64() {
|
||||
let a = 1.;
|
||||
let b = 8.;
|
||||
let e = 9.;
|
||||
let r = neon::vadd_f64(a, b);
|
||||
let r: f64 =
|
||||
mem::transmute(vadd_f64(mem::transmute(a), mem::transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vaddq_f64() {
|
||||
unsafe fn test_vaddq_f64() {
|
||||
let a = f64x2::new(1., 2.);
|
||||
let b = f64x2::new(8., 7.);
|
||||
let e = f64x2::new(9., 9.);
|
||||
let r = neon::vaddq_f64(a, b);
|
||||
let r: f64x2 = vaddq_f64(a.into_bits(), b.into_bits()).into_bits();
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vaddd_s64() {
|
||||
let a = 1;
|
||||
let b = 8;
|
||||
let e = 9;
|
||||
let r = neon::vaddd_s64(a, b);
|
||||
unsafe fn test_vaddd_s64() {
|
||||
let a = 1_i64;
|
||||
let b = 8_i64;
|
||||
let e = 9_i64;
|
||||
let r: i64 =
|
||||
mem::transmute(vaddd_s64(mem::transmute(a), mem::transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vaddd_u64() {
|
||||
let a = 1;
|
||||
let b = 8;
|
||||
let e = 9;
|
||||
let r = neon::vaddd_u64(a, b);
|
||||
unsafe fn test_vaddd_u64() {
|
||||
let a = 1_u64;
|
||||
let b = 8_u64;
|
||||
let e = 9_u64;
|
||||
let r: u64 =
|
||||
mem::transmute(vaddd_u64(mem::transmute(a), mem::transmute(b)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxv_s8() {
|
||||
let r = neon::vmaxv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5));
|
||||
unsafe fn test_vmaxv_s8() {
|
||||
let r = vmaxv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5).into_bits());
|
||||
assert_eq!(r, 7_i8);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxvq_s8() {
|
||||
unsafe fn test_vmaxvq_s8() {
|
||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
let r = neon::vmaxvq_s8(i8x16::new(
|
||||
let r = vmaxvq_s8(i8x16::new(
|
||||
1, 2, 3, 4,
|
||||
-16, 6, 7, 5,
|
||||
8, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
));
|
||||
).into_bits());
|
||||
assert_eq!(r, 8_i8);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxv_s16() {
|
||||
let r = neon::vmaxv_s16(i16x4::new(1, 2, -4, 3));
|
||||
unsafe fn test_vmaxv_s16() {
|
||||
let r = vmaxv_s16(i16x4::new(1, 2, -4, 3).into_bits());
|
||||
assert_eq!(r, 3_i16);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxvq_s16() {
|
||||
let r = neon::vmaxvq_s16(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5));
|
||||
unsafe fn test_vmaxvq_s16() {
|
||||
let r = vmaxvq_s16(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5).into_bits());
|
||||
assert_eq!(r, 7_i16);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxv_s32() {
|
||||
let r = neon::vmaxv_s32(i32x2::new(1, -4));
|
||||
unsafe fn test_vmaxv_s32() {
|
||||
let r = vmaxv_s32(i32x2::new(1, -4).into_bits());
|
||||
assert_eq!(r, 1_i32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxvq_s32() {
|
||||
let r = neon::vmaxvq_s32(i32x4::new(1, 2, -32, 4));
|
||||
unsafe fn test_vmaxvq_s32() {
|
||||
let r = vmaxvq_s32(i32x4::new(1, 2, -32, 4).into_bits());
|
||||
assert_eq!(r, 4_i32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxv_u8() {
|
||||
let r = neon::vmaxv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5));
|
||||
unsafe fn test_vmaxv_u8() {
|
||||
let r = vmaxv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5).into_bits());
|
||||
assert_eq!(r, 8_u8);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxvq_u8() {
|
||||
unsafe fn test_vmaxvq_u8() {
|
||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
let r = neon::vmaxvq_u8(u8x16::new(
|
||||
let r = vmaxvq_u8(u8x16::new(
|
||||
1, 2, 3, 4,
|
||||
16, 6, 7, 5,
|
||||
8, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
));
|
||||
).into_bits());
|
||||
assert_eq!(r, 16_u8);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxv_u16() {
|
||||
let r = neon::vmaxv_u16(u16x4::new(1, 2, 4, 3));
|
||||
unsafe fn test_vmaxv_u16() {
|
||||
let r = vmaxv_u16(u16x4::new(1, 2, 4, 3).into_bits());
|
||||
assert_eq!(r, 4_u16);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxvq_u16() {
|
||||
let r = neon::vmaxvq_u16(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5));
|
||||
unsafe fn test_vmaxvq_u16() {
|
||||
let r = vmaxvq_u16(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5).into_bits());
|
||||
assert_eq!(r, 16_u16);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxv_u32() {
|
||||
let r = neon::vmaxv_u32(u32x2::new(1, 4));
|
||||
unsafe fn test_vmaxv_u32() {
|
||||
let r = vmaxv_u32(u32x2::new(1, 4).into_bits());
|
||||
assert_eq!(r, 4_u32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxvq_u32() {
|
||||
let r = neon::vmaxvq_u32(u32x4::new(1, 2, 32, 4));
|
||||
unsafe fn test_vmaxvq_u32() {
|
||||
let r = vmaxvq_u32(u32x4::new(1, 2, 32, 4).into_bits());
|
||||
assert_eq!(r, 32_u32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxv_f32() {
|
||||
let r = neon::vmaxv_f32(f32x2::new(1., 4.));
|
||||
unsafe fn test_vmaxv_f32() {
|
||||
let r = vmaxv_f32(f32x2::new(1., 4.).into_bits());
|
||||
assert_eq!(r, 4_f32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxvq_f32() {
|
||||
let r = neon::vmaxvq_f32(f32x4::new(1., 2., 32., 4.));
|
||||
unsafe fn test_vmaxvq_f32() {
|
||||
let r = vmaxvq_f32(f32x4::new(1., 2., 32., 4.).into_bits());
|
||||
assert_eq!(r, 32_f32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vmaxvq_f64() {
|
||||
let r = neon::vmaxvq_f64(f64x2::new(1., 4.));
|
||||
unsafe fn test_vmaxvq_f64() {
|
||||
let r = vmaxvq_f64(f64x2::new(1., 4.).into_bits());
|
||||
assert_eq!(r, 4_f64);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminv_s8() {
|
||||
let r = neon::vminv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5));
|
||||
unsafe fn test_vminv_s8() {
|
||||
let r = vminv_s8(i8x8::new(1, 2, 3, 4, -8, 6, 7, 5).into_bits());
|
||||
assert_eq!(r, -8_i8);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminvq_s8() {
|
||||
unsafe fn test_vminvq_s8() {
|
||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
let r = neon::vminvq_s8(i8x16::new(
|
||||
let r = vminvq_s8(i8x16::new(
|
||||
1, 2, 3, 4,
|
||||
-16, 6, 7, 5,
|
||||
8, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
));
|
||||
).into_bits());
|
||||
assert_eq!(r, -16_i8);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminv_s16() {
|
||||
let r = neon::vminv_s16(i16x4::new(1, 2, -4, 3));
|
||||
unsafe fn test_vminv_s16() {
|
||||
let r = vminv_s16(i16x4::new(1, 2, -4, 3).into_bits());
|
||||
assert_eq!(r, -4_i16);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminvq_s16() {
|
||||
let r = neon::vminvq_s16(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5));
|
||||
unsafe fn test_vminvq_s16() {
|
||||
let r = vminvq_s16(i16x8::new(1, 2, 7, 4, -16, 6, 7, 5).into_bits());
|
||||
assert_eq!(r, -16_i16);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminv_s32() {
|
||||
let r = neon::vminv_s32(i32x2::new(1, -4));
|
||||
unsafe fn test_vminv_s32() {
|
||||
let r = vminv_s32(i32x2::new(1, -4).into_bits());
|
||||
assert_eq!(r, -4_i32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminvq_s32() {
|
||||
let r = neon::vminvq_s32(i32x4::new(1, 2, -32, 4));
|
||||
unsafe fn test_vminvq_s32() {
|
||||
let r = vminvq_s32(i32x4::new(1, 2, -32, 4).into_bits());
|
||||
assert_eq!(r, -32_i32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminv_u8() {
|
||||
let r = neon::vminv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5));
|
||||
unsafe fn test_vminv_u8() {
|
||||
let r = vminv_u8(u8x8::new(1, 2, 3, 4, 8, 6, 7, 5).into_bits());
|
||||
assert_eq!(r, 1_u8);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminvq_u8() {
|
||||
unsafe fn test_vminvq_u8() {
|
||||
#[cfg_attr(rustfmt, rustfmt_skip)]
|
||||
let r = neon::vminvq_u8(u8x16::new(
|
||||
let r = vminvq_u8(u8x16::new(
|
||||
1, 2, 3, 4,
|
||||
16, 6, 7, 5,
|
||||
8, 1, 1, 1,
|
||||
1, 1, 1, 1,
|
||||
));
|
||||
).into_bits());
|
||||
assert_eq!(r, 1_u8);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminv_u16() {
|
||||
let r = neon::vminv_u16(u16x4::new(1, 2, 4, 3));
|
||||
unsafe fn test_vminv_u16() {
|
||||
let r = vminv_u16(u16x4::new(1, 2, 4, 3).into_bits());
|
||||
assert_eq!(r, 1_u16);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminvq_u16() {
|
||||
let r = neon::vminvq_u16(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5));
|
||||
unsafe fn test_vminvq_u16() {
|
||||
let r = vminvq_u16(u16x8::new(1, 2, 7, 4, 16, 6, 7, 5).into_bits());
|
||||
assert_eq!(r, 1_u16);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminv_u32() {
|
||||
let r = neon::vminv_u32(u32x2::new(1, 4));
|
||||
unsafe fn test_vminv_u32() {
|
||||
let r = vminv_u32(u32x2::new(1, 4).into_bits());
|
||||
assert_eq!(r, 1_u32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminvq_u32() {
|
||||
let r = neon::vminvq_u32(u32x4::new(1, 2, 32, 4));
|
||||
unsafe fn test_vminvq_u32() {
|
||||
let r = vminvq_u32(u32x4::new(1, 2, 32, 4).into_bits());
|
||||
assert_eq!(r, 1_u32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminv_f32() {
|
||||
let r = neon::vminv_f32(f32x2::new(1., 4.));
|
||||
unsafe fn test_vminv_f32() {
|
||||
let r = vminv_f32(f32x2::new(1., 4.).into_bits());
|
||||
assert_eq!(r, 1_f32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminvq_f32() {
|
||||
let r = neon::vminvq_f32(f32x4::new(1., 2., 32., 4.));
|
||||
unsafe fn test_vminvq_f32() {
|
||||
let r = vminvq_f32(f32x4::new(1., 2., 32., 4.).into_bits());
|
||||
assert_eq!(r, 1_f32);
|
||||
}
|
||||
|
||||
#[simd_test = "neon"]
|
||||
unsafe fn vminvq_f64() {
|
||||
let r = neon::vminvq_f64(f64x2::new(1., 4.));
|
||||
unsafe fn test_vminvq_f64() {
|
||||
let r = vminvq_f64(f64x2::new(1., 4.).into_bits());
|
||||
assert_eq!(r, 1_f64);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,17 +22,12 @@ pub unsafe fn _clz_u64(x: u64) -> u64 {
|
|||
x.leading_zeros() as u64
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.bitreverse.i64"]
|
||||
fn rbit_u64(i: i64) -> i64;
|
||||
}
|
||||
|
||||
/// Reverse the bit order.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn _rbit_u64(x: u64) -> u64 {
|
||||
rbit_u64(x as i64) as u64
|
||||
use intrinsics::bitreverse;
|
||||
bitreverse(x)
|
||||
}
|
||||
|
||||
/// Counts the leading most significant bits set.
|
||||
|
|
|
|||
|
|
@ -7,14 +7,21 @@
|
|||
//! http://infocenter.arm.com/help/topic/com.arm.doc.
|
||||
//! ihi0073a/IHI0073A_arm_neon_intrinsics_ref.pdf
|
||||
//! [arm_dat]: https://developer.arm.com/technologies/neon/intrinsics
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
mod v6;
|
||||
pub use self::v6::*;
|
||||
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
|
||||
mod v7;
|
||||
#[cfg(any(target_arch = "aarch64", target_feature = "v7"))]
|
||||
pub use self::v7::*;
|
||||
|
||||
#[cfg(target_feature = "neon")]
|
||||
// NEON is supported on AArch64, and on ARM when built with the v7 and neon
|
||||
// features. Building ARM without neon produces incorrect codegen.
|
||||
#[cfg(any(target_arch = "aarch64",
|
||||
all(target_feature = "v7", target_feature = "neon")))]
|
||||
mod neon;
|
||||
#[cfg(target_feature = "neon")]
|
||||
#[cfg(any(target_arch = "aarch64",
|
||||
all(target_feature = "v7", target_feature = "neon")))]
|
||||
pub use self::neon::*;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -14,38 +14,41 @@ use stdsimd_test::assert_instr;
|
|||
|
||||
/// Count Leading Zeros.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(clz))]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382
|
||||
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
|
||||
pub unsafe fn _clz_u8(x: u8) -> u8 {
|
||||
x.leading_zeros() as u8
|
||||
}
|
||||
|
||||
/// Count Leading Zeros.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(clz))]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382
|
||||
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
|
||||
pub unsafe fn _clz_u16(x: u16) -> u16 {
|
||||
x.leading_zeros() as u16
|
||||
}
|
||||
|
||||
/// Count Leading Zeros.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(clz))]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))]
|
||||
// FIXME: https://github.com/rust-lang-nursery/stdsimd/issues/382
|
||||
// #[cfg_attr(all(test, target_arch = "arm"), assert_instr(clz))]
|
||||
pub unsafe fn _clz_u32(x: u32) -> u32 {
|
||||
x.leading_zeros() as u32
|
||||
}
|
||||
|
||||
/// Reverse the bit order.
|
||||
#[inline]
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg(dont_compile_me)] // FIXME need to add `v7` upstream in rustc
|
||||
#[cfg_attr(test, assert_instr(rbit))]
|
||||
pub unsafe fn _rbit_u32(x: u32) -> u32 {
|
||||
rbit_u32(x as i32) as u32
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.bitreverse.i32"]
|
||||
fn rbit_u32(i: i32) -> i32;
|
||||
use intrinsics::bitreverse;
|
||||
bitreverse(x)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
23
library/stdarch/coresimd/macros.rs
Normal file
23
library/stdarch/coresimd/macros.rs
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
//! Utility macros.
|
||||
|
||||
#[allow(unused)]
|
||||
macro_rules! types {
|
||||
($(
|
||||
$(#[$doc:meta])*
|
||||
pub struct $name:ident($($fields:tt)*);
|
||||
)*) => ($(
|
||||
$(#[$doc])*
|
||||
#[derive(Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(simd)]
|
||||
pub struct $name($($fields)*);
|
||||
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(expl_impl_clone_on_copy))]
|
||||
impl ::clone::Clone for $name {
|
||||
#[inline] // currently needed for correctness
|
||||
fn clone(&self) -> $name {
|
||||
*self
|
||||
}
|
||||
}
|
||||
)*)
|
||||
}
|
||||
|
|
@ -1,5 +1,8 @@
|
|||
//! `coresimd`
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
#[macro_use]
|
||||
mod ppsv;
|
||||
|
||||
|
|
|
|||
|
|
@ -59,18 +59,19 @@ pub trait IntoBits<T>: ::marker::Sized {
|
|||
fn into_bits(self) -> T;
|
||||
}
|
||||
|
||||
// FromBits implies IntoBits
|
||||
// FromBits implies IntoBits.
|
||||
impl<T, U> IntoBits<U> for T
|
||||
where
|
||||
U: FromBits<T>,
|
||||
{
|
||||
#[inline]
|
||||
fn into_bits(self) -> U {
|
||||
debug_assert!(::mem::size_of::<Self>() == ::mem::size_of::<U>());
|
||||
U::from_bits(self)
|
||||
}
|
||||
}
|
||||
|
||||
// FromBits (and thus IntoBits) is reflexive
|
||||
// FromBits (and thus IntoBits) is reflexive.
|
||||
impl<T> FromBits<T> for T {
|
||||
#[inline]
|
||||
fn from_bits(t: Self) -> Self {
|
||||
|
|
|
|||
|
|
@ -78,6 +78,72 @@ simd_f_ty! {
|
|||
/// A 128-bit vector with 2 `f64` lanes.
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::{__m128, __m128d, __m128i};
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::{__m128, __m128d, __m128i};
|
||||
|
||||
macro_rules! from_bits_x86 {
|
||||
($id:ident, $elem_ty:ident, $test_mod:ident) => {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!($id: __m128, __m128i, __m128d);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "arm", target_feature = "v7"))]
|
||||
use coresimd::arch::arm::{// FIXME: float16x8_t,
|
||||
float32x4_t,
|
||||
int16x8_t,
|
||||
int32x4_t,
|
||||
int64x2_t,
|
||||
int8x16_t,
|
||||
poly16x8_t,
|
||||
poly8x16_t,
|
||||
uint16x8_t,
|
||||
uint32x4_t,
|
||||
uint64x2_t,
|
||||
uint8x16_t};
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use coresimd::arch::aarch64::{// FIXME: float16x8_t,
|
||||
float32x4_t,
|
||||
float64x2_t,
|
||||
int16x8_t,
|
||||
int32x4_t,
|
||||
int64x2_t,
|
||||
int8x16_t,
|
||||
poly16x8_t,
|
||||
poly8x16_t,
|
||||
uint16x8_t,
|
||||
uint32x4_t,
|
||||
uint64x2_t,
|
||||
uint8x16_t};
|
||||
|
||||
macro_rules! from_bits_arm {
|
||||
($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => {
|
||||
#[cfg(any(all(target_arch = "arm", target_feature = "v7"), target_arch = "aarch64"))]
|
||||
impl_from_bits_!(
|
||||
$id:
|
||||
int8x16_t,
|
||||
uint8x16_t,
|
||||
int16x8_t,
|
||||
uint16x8_t,
|
||||
int32x4_t,
|
||||
uint32x4_t,
|
||||
int64x2_t,
|
||||
uint64x2_t,
|
||||
// FIXME: float16x8_t,
|
||||
float32x4_t,
|
||||
poly8x16_t,
|
||||
poly16x8_t
|
||||
);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
impl_from_bits_!(
|
||||
$id: float64x2_t
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl_from_bits!(
|
||||
u64x2: u64,
|
||||
u64x2_from_bits,
|
||||
|
|
@ -92,6 +158,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(u64x2, u64, u64x2_from_bits_x86);
|
||||
from_bits_arm!(u64x2, u64, u64x2_from_bits_arm, u64x2_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
i64x2: i64,
|
||||
i64x2_from_bits,
|
||||
|
|
@ -106,6 +175,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(i64x2, i64, i64x2_from_bits_x86);
|
||||
from_bits_arm!(i64x2, i64, i64x2_from_bits_arm, i64x2_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
f64x2: f64,
|
||||
f64x2_from_bits,
|
||||
|
|
@ -120,6 +192,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(f64x2, f64, f64x2_from_bits_x86);
|
||||
from_bits_arm!(f64x2, f64, f64x2_from_bits_arm, f64x2_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
u32x4: u32,
|
||||
u32x4_from_bits,
|
||||
|
|
@ -134,6 +209,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(u32x4, u32, u32x4_from_bits_x86);
|
||||
from_bits_arm!(u32x4, u32, u32x4_from_bits_arm, u32x4_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
i32x4: i32,
|
||||
i32x4_from_bits,
|
||||
|
|
@ -148,6 +226,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(i32x4, i32, i32x4_from_bits_x86);
|
||||
from_bits_arm!(i32x4, i32, i32x4_from_bits_arm, i32x4_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
f32x4: f32,
|
||||
f32x4_from_bits,
|
||||
|
|
@ -162,6 +243,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(f32x4, f32, f32x4_from_bits_x86);
|
||||
from_bits_arm!(f32x4, f32, f32x4_from_bits_arm, f32x4_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
u16x8: u16,
|
||||
u16x8_from_bits,
|
||||
|
|
@ -176,6 +260,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(u16x8, u16, u16x8_from_bits_x86);
|
||||
from_bits_arm!(u16x8, u16, u16x8_from_bits_arm, u16x8_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
i16x8: i16,
|
||||
i16x8_from_bits,
|
||||
|
|
@ -190,6 +277,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(i16x8, i16, i16x8_from_bits_x86);
|
||||
from_bits_arm!(i16x8, i16, i16x8_from_bits_arm, i16x8_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
u8x16: u8,
|
||||
u8x16_from_bits,
|
||||
|
|
@ -204,6 +294,9 @@ impl_from_bits!(
|
|||
i8x16,
|
||||
b8x16
|
||||
);
|
||||
from_bits_x86!(u8x16, u8, u8x16_from_bits_x86);
|
||||
from_bits_arm!(u8x16, u8, u8x16_from_bits_arm, u8x16_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
i8x16: i8,
|
||||
i8x16_from_bits,
|
||||
|
|
@ -218,32 +311,8 @@ impl_from_bits!(
|
|||
u8x16,
|
||||
b8x16
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::{__m128, __m128d, __m128i};
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::{__m128, __m128d, __m128i};
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(f64x2: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u64x2: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i64x2: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(f32x4: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u32x4: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i32x4: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u16x8: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i16x8: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u8x16: __m128, __m128i, __m128d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i8x16: __m128, __m128i, __m128d);
|
||||
from_bits_x86!(i8x16, i8, i8x16_from_bits_x86);
|
||||
from_bits_arm!(i8x16, i8, i8x16_from_bits_arm, i8x16_from_bits_aarch64);
|
||||
|
||||
impl_from!(
|
||||
f64x2: f64,
|
||||
|
|
|
|||
|
|
@ -94,6 +94,18 @@ simd_f_ty! {
|
|||
/// A 256-bit vector with 4 `f64` lanes.
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::{__m256, __m256d, __m256i};
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::{__m256, __m256d, __m256i};
|
||||
|
||||
macro_rules! from_bits_x86 {
|
||||
($id:ident, $elem_ty:ident, $test_mod:ident) => {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!($id: __m256, __m256i, __m256d);
|
||||
}
|
||||
}
|
||||
|
||||
impl_from_bits!(
|
||||
i8x32: i8,
|
||||
i8x32_from_bits,
|
||||
|
|
@ -108,6 +120,8 @@ impl_from_bits!(
|
|||
u8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(i8x32, i8, i8x32_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
u8x32: u8,
|
||||
u8x32_from_bits,
|
||||
|
|
@ -122,6 +136,8 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(u8x32, u8, u8x32_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
i16x16: i16,
|
||||
i16x16_from_bits,
|
||||
|
|
@ -136,6 +152,8 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(i16x16, i16, i16x16_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
u16x16: u16,
|
||||
u16x16_from_bits,
|
||||
|
|
@ -150,6 +168,8 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(u16x16, u16, u16x16_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
i32x8: i32,
|
||||
i32x8_from_bits,
|
||||
|
|
@ -164,6 +184,8 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(i32x8, i32, i32x8_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
u32x8: u32,
|
||||
u32x8_from_bits,
|
||||
|
|
@ -178,6 +200,8 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(u32x8, u32, u32x8_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
f32x8: f32,
|
||||
f32x8_from_bits,
|
||||
|
|
@ -192,6 +216,8 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(f32x8, f32, f32x8_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
i64x4: i64,
|
||||
i64x4_from_bits,
|
||||
|
|
@ -206,6 +232,8 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(i64x4, i64, i64x4_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
u64x4: u64,
|
||||
u64x4_from_bits,
|
||||
|
|
@ -220,6 +248,8 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
from_bits_x86!(u64x4, u64, u64x4_from_bits_x86);
|
||||
|
||||
impl_from_bits!(
|
||||
f64x4: f64,
|
||||
f64x4_from_bits,
|
||||
|
|
@ -234,32 +264,7 @@ impl_from_bits!(
|
|||
i8x32,
|
||||
b8x32
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::{__m256, __m256d, __m256i};
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::{__m256, __m256d, __m256i};
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(f64x4: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u64x4: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i64x4: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(f32x8: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u32x8: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i32x8: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u16x16: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i16x16: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u8x32: __m256, __m256i, __m256d);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i8x32: __m256, __m256i, __m256d);
|
||||
from_bits_x86!(f64x4, f64, f64x4_from_bits_x86);
|
||||
|
||||
impl_from!(
|
||||
f64x4: f64,
|
||||
|
|
|
|||
|
|
@ -57,6 +57,73 @@ simd_f_ty! {
|
|||
/// A 64-bit vector with 2 `f32` lanes.
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::__m64;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::__m64;
|
||||
|
||||
macro_rules! from_bits_x86 {
|
||||
($id:ident, $elem_ty:ident, $test_mod:ident) => {
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!($id: __m64);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "arm", target_feature = "v7"))]
|
||||
use coresimd::arch::arm::{// FIXME: float16x4_t,
|
||||
float32x2_t,
|
||||
int16x4_t,
|
||||
int32x2_t,
|
||||
int64x1_t,
|
||||
int8x8_t,
|
||||
poly16x4_t,
|
||||
poly8x8_t,
|
||||
uint16x4_t,
|
||||
uint32x2_t,
|
||||
uint64x1_t,
|
||||
uint8x8_t};
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use coresimd::arch::aarch64::{// FIXME: float16x4_t,
|
||||
float32x2_t,
|
||||
float64x1_t,
|
||||
int16x4_t,
|
||||
int32x2_t,
|
||||
int64x1_t,
|
||||
int8x8_t,
|
||||
poly16x4_t,
|
||||
poly8x8_t,
|
||||
uint16x4_t,
|
||||
uint32x2_t,
|
||||
uint64x1_t,
|
||||
uint8x8_t};
|
||||
|
||||
macro_rules! from_bits_arm {
|
||||
($id:ident, $elem_ty:ident, $test_mod_arm:ident, $test_mod_a64:ident) => {
|
||||
#[cfg(any(all(target_arch = "arm", target_feature = "v7"), target_arch = "aarch64"))]
|
||||
impl_from_bits_!(
|
||||
$id:
|
||||
int64x1_t,
|
||||
uint64x1_t,
|
||||
uint32x2_t,
|
||||
int32x2_t,
|
||||
float32x2_t,
|
||||
uint16x4_t,
|
||||
int16x4_t,
|
||||
// FIXME: float16x4_t
|
||||
poly16x4_t,
|
||||
uint8x8_t,
|
||||
int8x8_t,
|
||||
poly8x8_t
|
||||
);
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
impl_from_bits_!(
|
||||
$id: float64x1_t
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl_from_bits!(
|
||||
u32x2: u32,
|
||||
u32x2_from_bits,
|
||||
|
|
@ -68,6 +135,9 @@ impl_from_bits!(
|
|||
i8x8,
|
||||
b8x8
|
||||
);
|
||||
from_bits_x86!(u32x2, u32, u32x2_from_bits_x86);
|
||||
from_bits_arm!(u32x2, u32, u32x2_from_bits_arm, u32x2_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
i32x2: i32,
|
||||
i32x2_from_bits,
|
||||
|
|
@ -79,6 +149,9 @@ impl_from_bits!(
|
|||
i8x8,
|
||||
b8x8
|
||||
);
|
||||
from_bits_x86!(i32x2, i32, i32x2_from_bits_x86);
|
||||
from_bits_arm!(i32x2, i32, i32x2_from_bits_arm, i32x2_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
f32x2: f32,
|
||||
f32x2_from_bits,
|
||||
|
|
@ -90,6 +163,9 @@ impl_from_bits!(
|
|||
i8x8,
|
||||
b8x8
|
||||
);
|
||||
from_bits_x86!(f32x2, f32, f32x2_from_bits_x86);
|
||||
from_bits_arm!(f32x2, f32, f32x2_from_bits_arm, f32x2_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
u16x4: u16,
|
||||
u16x4_from_bits,
|
||||
|
|
@ -100,6 +176,9 @@ impl_from_bits!(
|
|||
i8x8,
|
||||
b8x8
|
||||
);
|
||||
from_bits_x86!(u16x4, u16, u16x4_from_bits_x86);
|
||||
from_bits_arm!(u16x4, u16, u16x4_from_bits_arm, u16x4_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
i16x4: i16,
|
||||
i16x4_from_bits,
|
||||
|
|
@ -110,6 +189,9 @@ impl_from_bits!(
|
|||
i8x8,
|
||||
b8x8
|
||||
);
|
||||
from_bits_x86!(i16x4, i16, i16x4_from_bits_x86);
|
||||
from_bits_arm!(i16x4, i16, i16x4_from_bits_arm, i16x4_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
u8x8: u8,
|
||||
u8x8_from_bits,
|
||||
|
|
@ -120,6 +202,9 @@ impl_from_bits!(
|
|||
i8x8,
|
||||
b8x8
|
||||
);
|
||||
from_bits_x86!(u8x8, u8, u8x8_from_bits_x86);
|
||||
from_bits_arm!(u8x8, u8, u8x8_from_bits_arm, u8x8_from_bits_aarch64);
|
||||
|
||||
impl_from_bits!(
|
||||
i8x8: i8,
|
||||
i8x8_from_bits,
|
||||
|
|
@ -130,27 +215,8 @@ impl_from_bits!(
|
|||
u8x8,
|
||||
b8x8
|
||||
);
|
||||
|
||||
#[cfg(target_arch = "x86")]
|
||||
use coresimd::arch::x86::__m64;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use coresimd::arch::x86_64::__m64;
|
||||
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(f32x2: __m64);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u32x2: __m64);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i32x2: __m64);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u16x4: __m64);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i16x4: __m64);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(u8x8: __m64);
|
||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||
impl_from_bits_!(i8x8: __m64);
|
||||
from_bits_x86!(i8x8, i8, i8x8_from_bits_x86);
|
||||
from_bits_arm!(i8x8, i8, i8x8_from_bits_arm, i8x8_from_bits_aarch64);
|
||||
|
||||
impl_from!(
|
||||
f32x2: f32,
|
||||
|
|
|
|||
|
|
@ -6,27 +6,6 @@ use mem;
|
|||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
macro_rules! types {
|
||||
($(
|
||||
$(#[$doc:meta])*
|
||||
pub struct $name:ident($($fields:tt)*);
|
||||
)*) => ($(
|
||||
$(#[$doc])*
|
||||
#[derive(Copy, Debug)]
|
||||
#[allow(non_camel_case_types)]
|
||||
#[repr(simd)]
|
||||
pub struct $name($($fields)*);
|
||||
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(expl_impl_clone_on_copy))]
|
||||
impl Clone for $name {
|
||||
#[inline] // currently needed for correctness
|
||||
fn clone(&self) -> $name {
|
||||
*self
|
||||
}
|
||||
}
|
||||
)*)
|
||||
}
|
||||
|
||||
types! {
|
||||
/// 64-bit wide integer vector type, x86-specific
|
||||
///
|
||||
|
|
@ -459,12 +438,12 @@ impl m256iExt for __m256i {
|
|||
}
|
||||
}
|
||||
|
||||
use coresimd::simd::{b8x16, b8x32, b8x8, f32x4, f32x8, f64x2, f64x4, i16x16,
|
||||
i16x4, i16x8, i32x2, i32x4, i32x8, i64x2, i64x4, i8x16,
|
||||
i8x32, i8x8, u16x16, u16x4, u16x8, u32x2, u32x4, u32x8,
|
||||
u64x2, u64x4, u8x16, u8x32, u8x8};
|
||||
use coresimd::simd::{b8x16, b8x32, b8x8, f32x2, f32x4, f32x8, f64x2, f64x4,
|
||||
i16x16, i16x4, i16x8, i32x2, i32x4, i32x8, i64x2, i64x4,
|
||||
i8x16, i8x32, i8x8, u16x16, u16x4, u16x8, u32x2, u32x4,
|
||||
u32x8, u64x2, u64x4, u8x16, u8x32, u8x8};
|
||||
|
||||
impl_from_bits_!(__m64: u32x2, i32x2, u16x4, i16x4, u8x8, i8x8, b8x8);
|
||||
impl_from_bits_!(__m64: u32x2, i32x2, f32x2, u16x4, i16x4, u8x8, i8x8, b8x8);
|
||||
impl_from_bits_!(
|
||||
__m128: u64x2,
|
||||
i64x2,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
//! coresimd 128-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float,
|
||||
cfg_target_feature)]
|
||||
#![allow(unused_imports, dead_code)]
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -30,7 +31,7 @@ macro_rules! vector_impl {
|
|||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
use std::{marker, mem};
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
//! coresimd 16-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float,
|
||||
cfg_target_feature)]
|
||||
#![allow(unused_imports, dead_code)]
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -30,7 +31,7 @@ macro_rules! vector_impl {
|
|||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
use std::{marker, mem};
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
//! coresimd 256-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float,
|
||||
cfg_target_feature)]
|
||||
#![allow(unused_imports)]
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -30,7 +31,7 @@ macro_rules! vector_impl {
|
|||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
use std::{marker, mem};
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
//! coresimd 32-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float,
|
||||
cfg_target_feature)]
|
||||
#![allow(unused_imports, dead_code)]
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -30,7 +31,7 @@ macro_rules! vector_impl {
|
|||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
use std::{marker, mem};
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
//! coresimd 512-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float,
|
||||
cfg_target_feature)]
|
||||
#![allow(unused_imports)]
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -30,7 +31,7 @@ macro_rules! vector_impl {
|
|||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
use std::{marker, mem};
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
//! coresimd 64-bit wide vector tests
|
||||
|
||||
#![cfg_attr(feature = "strict", deny(warnings))]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float)]
|
||||
#![feature(stdsimd, link_llvm_intrinsics, simd_ffi, core_float,
|
||||
cfg_target_feature)]
|
||||
#![allow(unused_imports, dead_code)]
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -30,7 +31,7 @@ macro_rules! vector_impl {
|
|||
mod ppsv;
|
||||
|
||||
#[cfg(test)]
|
||||
use std::marker;
|
||||
use std::{marker, mem};
|
||||
|
||||
#[cfg(all(test, target_arch = "aarch64"))]
|
||||
use std::cmp;
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ pub fn simd_test(
|
|||
.expect(&format!("target triple contained no \"-\": {}", target))
|
||||
{
|
||||
"i686" | "x86_64" | "i586" => "is_x86_feature_detected",
|
||||
"arm" => "is_arm_feature_detected",
|
||||
"arm" | "armv7" => "is_arm_feature_detected",
|
||||
"aarch64" => "is_aarch64_feature_detected",
|
||||
"powerpc64" => "is_powerpc64_feature_detected",
|
||||
"mips" | "mipsel" => {
|
||||
|
|
|
|||
|
|
@ -43,21 +43,21 @@ impl Frsqrt for f64x2 {
|
|||
};
|
||||
Self::new(u.extract(0), u.extract(1))
|
||||
}
|
||||
#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"),
|
||||
target_feature = "neon"))]
|
||||
#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
|
||||
{
|
||||
#[cfg(target_arch = "arm")]
|
||||
use stdsimd::arch::arm::*;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
use stdsimd::arch::aarch64::*;
|
||||
|
||||
unsafe { vrsqrte_f32((*self).into()).into() }
|
||||
let t: f32x2 = (*self).into();
|
||||
let t: f32x2 = unsafe { vrsqrte_f32(t.into_bits()).into_bits() };
|
||||
t.into()
|
||||
}
|
||||
#[cfg(not(any(all(any(target_arch = "x86",
|
||||
target_arch = "x86_64"),
|
||||
target_feature = "sse"),
|
||||
all(any(target_arch = "arm",
|
||||
target_arch = "aarch64"),
|
||||
all(target_arch = "aarch64",
|
||||
target_feature = "neon"))))]
|
||||
{
|
||||
self.replace(0, 1. / self.extract(0).sqrt());
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ macro_rules! is_arm_feature_detected {
|
|||
cfg!(target_feature = "pmull") ||
|
||||
$crate::arch::detect::check_for($crate::arch::detect::Feature::pmull)
|
||||
};
|
||||
("v7") => { compile_error!("\"v7\" feature cannot be detected at run-time") };
|
||||
("vfp2") => { compile_error!("\"vfp2\" feature cannot be detected at run-time") };
|
||||
("vfp3") => { compile_error!("\"vfp3\" feature cannot be detected at run-time") };
|
||||
("vfp4") => { compile_error!("\"vfp4\" feature cannot be detected at run-time") };
|
||||
($t:tt) => { compile_error!(concat!("unknown arm target feature: ", $t)) };
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue