add vreinterpret neon instructions (#1101)
This commit is contained in:
parent
f9e5dfdd66
commit
64f84788c9
6 changed files with 5648 additions and 122 deletions
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -4411,60 +4411,6 @@ pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t {
|
|||
vdupq_n_f32(value)
|
||||
}
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t {
|
||||
transmute(a)
|
||||
}
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t {
|
||||
transmute(a)
|
||||
}
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t {
|
||||
transmute(a)
|
||||
}
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t {
|
||||
transmute(a)
|
||||
}
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t {
|
||||
transmute(a)
|
||||
}
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
|
||||
#[cfg_attr(test, assert_instr(nop))]
|
||||
pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t {
|
||||
transmute(a)
|
||||
}
|
||||
|
||||
/// Unsigned shift right
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -6152,16 +6098,6 @@ mod tests {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vreinterpret_u64_u32() {
|
||||
let v: i8 = 42;
|
||||
let e = i8x16::new(
|
||||
42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
|
||||
);
|
||||
let r: i8x16 = transmute(vdupq_n_s8(v));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vget_high_s8() {
|
||||
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
|
|
@ -10518,43 +10454,6 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vreinterpretq_s8_u8() {
|
||||
let a = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: u8x16 = transmute(vreinterpretq_s8_u8(transmute(a)));
|
||||
let e = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vreinterpretq_u16_u8() {
|
||||
let a = u16x8::new(
|
||||
0x01_00, 0x03_02, 0x05_04, 0x07_06, 0x09_08, 0x0B_0A, 0x0D_0C, 0x0F_0E,
|
||||
);
|
||||
let r: u8x16 = transmute(vreinterpretq_u16_u8(transmute(a)));
|
||||
let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vreinterpretq_u32_u8() {
|
||||
let a = u32x4::new(0x03_02_01_00, 0x07_06_05_04, 0x0B_0A_09_08, 0x0F_0E_0D_0C);
|
||||
let r: u8x16 = transmute(vreinterpretq_u32_u8(transmute(a)));
|
||||
let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vreinterpretq_u64_u8() {
|
||||
let a: u64x2 = u64x2::new(0x07_06_05_04_03_02_01_00, 0x0F_0E_0D_0C_0B_0A_09_08);
|
||||
let r: u8x16 = transmute(vreinterpretq_u64_u8(transmute(a)));
|
||||
let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vreinterpretq_u8_s8() {
|
||||
let a = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
let r: i8x16 = transmute(vreinterpretq_u8_s8(transmute(a)));
|
||||
let e = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
|
||||
assert_eq!(r, e)
|
||||
}
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vabs_s8() {
|
||||
let a = i8x8::new(-1, 0, 1, -2, 0, 2, -128, 127);
|
||||
|
|
|
|||
|
|
@ -733,7 +733,7 @@ generate float32x2_t:float64x2_t
|
|||
|
||||
/// Floating-point convert to higher precision long
|
||||
name = vcvt_high
|
||||
double-suffixes
|
||||
noq-double-suffixes
|
||||
multi_fn = simd_shuffle2, b:float32x2_t, a, a, [2, 3]
|
||||
multi_fn = simd_cast, b
|
||||
a = -1.2, 1.2, 2.3, 3.4
|
||||
|
|
@ -754,7 +754,7 @@ generate float64x2_t:float32x2_t
|
|||
|
||||
/// Floating-point convert to lower precision narrow
|
||||
name = vcvt_high
|
||||
double-suffixes
|
||||
noq-double-suffixes
|
||||
multi_fn = simd_shuffle4, a, {simd_cast, b}, [0, 1, 2, 3]
|
||||
a = -1.2, 1.2
|
||||
b = -2.3, 3.4
|
||||
|
|
@ -775,8 +775,8 @@ generate float64x2_t:float32x2_t
|
|||
|
||||
/// Floating-point convert to lower precision narrow, rounding to odd
|
||||
name = vcvtx_high
|
||||
double-suffixes
|
||||
multi_fn = simd_shuffle4, a, {vcvtx-doubleself-noext, b}, [0, 1, 2, 3]
|
||||
noq-double-suffixes
|
||||
multi_fn = simd_shuffle4, a, {vcvtx-noq_doubleself-noext, b}, [0, 1, 2, 3]
|
||||
a = -1.0, 2.0
|
||||
b = -3.0, 4.0
|
||||
validate -1.0, 2.0, -3.0, 4.0
|
||||
|
|
@ -1417,6 +1417,186 @@ arm = vrecpe
|
|||
link-arm = vrecpe._EXT_
|
||||
generate float*_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
|
||||
aarch64 = str
|
||||
generate poly64x1_t:int64x1_t, poly64x1_t:uint64x1_t, int64x1_t:poly64x1_t, uint64x1_t:poly64x1_t
|
||||
generate poly64x2_t:int64x2_t, poly64x2_t:uint64x2_t, int64x2_t:poly64x2_t, uint64x2_t:poly64x2_t
|
||||
|
||||
arm = str
|
||||
generate uint8x8_t:int8x8_t, poly8x8_t:int8x8_t, poly16x4_t:int16x4_t, uint16x4_t:int16x4_t, uint32x2_t:int32x2_t, uint64x1_t:int64x1_t
|
||||
generate uint8x16_t:int8x16_t, poly8x16_t:int8x16_t, poly16x8_t:int16x8_t, uint16x8_t:int16x8_t, uint32x4_t:int32x4_t, uint64x2_t:int64x2_t
|
||||
generate poly8x8_t:uint8x8_t, int8x8_t:uint8x8_t, poly16x4_t:uint16x4_t, int16x4_t:uint16x4_t, int32x2_t:uint32x2_t, int64x1_t:uint64x1_t
|
||||
generate poly8x16_t:uint8x16_t, int8x16_t:uint8x16_t, poly16x8_t:uint16x8_t, int16x8_t:uint16x8_t, int32x4_t:uint32x4_t, int64x2_t:uint64x2_t
|
||||
generate int8x8_t:poly8x8_t, uint8x8_t:poly8x8_t, int16x4_t:poly16x4_t, uint16x4_t:poly16x4_t
|
||||
generate int8x16_t:poly8x16_t, uint8x16_t:poly8x16_t, int16x8_t:poly16x8_t, uint16x8_t:poly16x8_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0, 1, 2, 3, 4, 5, 6, 7
|
||||
validate 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
|
||||
|
||||
aarch64 = str
|
||||
generate poly64x1_t:int32x2_t, poly64x1_t:uint32x2_t
|
||||
generate poly64x2_t:int32x4_t, poly64x2_t:uint32x4_t
|
||||
|
||||
arm = str
|
||||
generate int16x4_t:int8x8_t, uint16x4_t:int8x8_t, poly16x4_t:int8x8_t, int32x2_t:int16x4_t, uint32x2_t:int16x4_t, int64x1_t:int32x2_t, uint64x1_t:int32x2_t
|
||||
generate int16x8_t:int8x16_t, uint16x8_t:int8x16_t, poly16x8_t:int8x16_t, int32x4_t:int16x8_t, uint32x4_t:int16x8_t, int64x2_t:int32x4_t, uint64x2_t:int32x4_t
|
||||
generate poly16x4_t:uint8x8_t, int16x4_t:uint8x8_t, uint16x4_t:uint8x8_t, int32x2_t:uint16x4_t, uint32x2_t:uint16x4_t, int64x1_t:uint32x2_t, uint64x1_t:uint32x2_t
|
||||
generate poly16x8_t:uint8x16_t, int16x8_t:uint8x16_t, uint16x8_t:uint8x16_t, int32x4_t:uint16x8_t, uint32x4_t:uint16x8_t, int64x2_t:uint32x4_t, uint64x2_t:uint32x4_t
|
||||
generate poly16x4_t:poly8x8_t, int16x4_t:poly8x8_t, uint16x4_t:poly8x8_t, int32x2_t:poly16x4_t, uint32x2_t:poly16x4_t
|
||||
generate poly16x8_t:poly8x16_t, int16x8_t:poly8x16_t, uint16x8_t:poly8x16_t, int32x4_t:poly16x8_t, uint32x4_t:poly16x8_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0
|
||||
validate 0, 1, 2, 3, 4, 5, 6, 7
|
||||
|
||||
aarch64 = str
|
||||
generate int32x2_t:poly64x1_t, uint32x2_t:poly64x1_t
|
||||
generate int32x4_t:poly64x2_t, uint32x4_t:poly64x2_t
|
||||
|
||||
arm = str
|
||||
generate poly8x8_t:int16x4_t, int8x8_t:int16x4_t, uint8x8_t:int16x4_t, poly16x4_t:int32x2_t, int16x4_t:int32x2_t, uint16x4_t:int32x2_t, int32x2_t:int64x1_t, uint32x2_t:int64x1_t
|
||||
generate poly8x16_t:int16x8_t, int8x16_t:int16x8_t, uint8x16_t:int16x8_t, poly16x8_t:int32x4_t, int16x8_t:int32x4_t, uint16x8_t:int32x4_t, int32x4_t:int64x2_t, uint32x4_t:int64x2_t
|
||||
generate poly8x8_t:uint16x4_t, int8x8_t:uint16x4_t, uint8x8_t:uint16x4_t, poly16x4_t:uint32x2_t, int16x4_t:uint32x2_t, uint16x4_t:uint32x2_t, int32x2_t:uint64x1_t, uint32x2_t:uint64x1_t
|
||||
generate poly8x16_t:uint16x8_t, int8x16_t:uint16x8_t, uint8x16_t:uint16x8_t, poly16x8_t:uint32x4_t, int16x8_t:uint32x4_t, uint16x8_t:uint32x4_t, int32x4_t:uint64x2_t, uint32x4_t:uint64x2_t
|
||||
generate poly8x8_t:poly16x4_t, int8x8_t:poly16x4_t, uint8x8_t:poly16x4_t
|
||||
generate poly8x16_t:poly16x8_t, int8x16_t:poly16x8_t, uint8x16_t:poly16x8_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0, 1, 2, 3
|
||||
validate 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
|
||||
|
||||
aarch64 = str
|
||||
generate poly64x1_t:int16x4_t, poly64x1_t:uint16x4_t, poly64x1_t:poly16x4_t
|
||||
generate poly64x2_t:int16x8_t, poly64x2_t:uint16x8_t, poly64x2_t:poly16x8_t
|
||||
|
||||
arm = str
|
||||
generate int32x2_t:int8x8_t, uint32x2_t:int8x8_t, int64x1_t:int16x4_t, uint64x1_t:int16x4_t
|
||||
generate int32x4_t:int8x16_t, uint32x4_t:int8x16_t, int64x2_t:int16x8_t, uint64x2_t:int16x8_t
|
||||
generate int32x2_t:uint8x8_t, uint32x2_t:uint8x8_t, int64x1_t:uint16x4_t, uint64x1_t:uint16x4_t
|
||||
generate int32x4_t:uint8x16_t, uint32x4_t:uint8x16_t, int64x2_t:uint16x8_t, uint64x2_t:uint16x8_t
|
||||
generate int32x2_t:poly8x8_t, uint32x2_t:poly8x8_t, int64x1_t:poly16x4_t, uint64x1_t:poly16x4_t
|
||||
generate int32x4_t:poly8x16_t, uint32x4_t:poly8x16_t, int64x2_t:poly16x8_t, uint64x2_t:poly16x8_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0
|
||||
validate 0, 1, 2, 3
|
||||
|
||||
aarch64 = str
|
||||
generate poly16x4_t:poly64x1_t, int16x4_t:poly64x1_t, uint16x4_t:poly64x1_t
|
||||
generate poly16x8_t:poly64x2_t, int16x8_t:poly64x2_t, uint16x8_t:poly64x2_t
|
||||
|
||||
arm = str
|
||||
generate poly8x8_t:int32x2_t, int8x8_t:int32x2_t, uint8x8_t:int32x2_t, poly16x4_t:int64x1_t, int16x4_t:int64x1_t, uint16x4_t:int64x1_t
|
||||
generate poly8x16_t:int32x4_t, int8x16_t:int32x4_t, uint8x16_t:int32x4_t, poly16x8_t:int64x2_t, int16x8_t:int64x2_t, uint16x8_t:int64x2_t
|
||||
generate poly8x8_t:uint32x2_t, int8x8_t:uint32x2_t, uint8x8_t:uint32x2_t, poly16x4_t:uint64x1_t, int16x4_t:uint64x1_t, uint16x4_t:uint64x1_t
|
||||
generate poly8x16_t:uint32x4_t, int8x16_t:uint32x4_t, uint8x16_t:uint32x4_t, poly16x8_t:uint64x2_t, int16x8_t:uint64x2_t, uint16x8_t:uint64x2_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0, 1
|
||||
validate 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
aarch64 = str
|
||||
generate poly64x1_t:int8x8_t, poly64x1_t:uint8x8_t, poly64x1_t:poly8x8_t
|
||||
generate poly64x2_t:int8x16_t, poly64x2_t:uint8x16_t, poly64x2_t:poly8x16_t
|
||||
|
||||
arm = str
|
||||
generate int64x1_t:int8x8_t, uint64x1_t:int8x8_t, int64x1_t:uint8x8_t, uint64x1_t:uint8x8_t, int64x1_t:poly8x8_t, uint64x1_t:poly8x8_t
|
||||
generate int64x2_t:int8x16_t, uint64x2_t:int8x16_t, int64x2_t:uint8x16_t, uint64x2_t:uint8x16_t, int64x2_t:poly8x16_t, uint64x2_t:poly8x16_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
|
||||
validate 0, 1
|
||||
|
||||
aarch64 = str
|
||||
generate poly8x8_t:poly64x1_t, int8x8_t:poly64x1_t, uint8x8_t:poly64x1_t
|
||||
generate poly8x16_t:poly64x2_t, int8x16_t:poly64x2_t, uint8x16_t:poly64x2_t
|
||||
|
||||
arm = str
|
||||
generate poly8x8_t:int64x1_t, int8x8_t:int64x1_t, uint8x8_t:int64x1_t, poly8x8_t:uint64x1_t, int8x8_t:uint64x1_t, uint8x8_t:uint64x1_t
|
||||
generate poly8x16_t:int64x2_t, int8x16_t:int64x2_t, uint8x16_t:int64x2_t, poly8x16_t:uint64x2_t, int8x16_t:uint64x2_t, uint8x16_t:uint64x2_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0., 0., 0., 0., 0., 0., 0., 0.
|
||||
validate 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
|
||||
aarch64 = str
|
||||
generate float64x1_t:int8x8_t, float64x1_t:int16x4_t, float64x1_t:int32x2_t, float64x1_t:int64x1_t
|
||||
generate float64x2_t:int8x16_t, float64x2_t:int16x8_t, float64x2_t:int32x4_t, float64x2_t:int64x2_t
|
||||
generate float64x1_t:uint8x8_t, float64x1_t:uint16x4_t, float64x1_t:uint32x2_t, float64x1_t:uint64x1_t
|
||||
generate float64x2_t:uint8x16_t, float64x2_t:uint16x8_t, float64x2_t:uint32x4_t, float64x2_t:uint64x2_t
|
||||
generate float64x1_t:poly8x8_t, float64x1_t:poly16x4_t, float32x2_t:poly64x1_t, float64x1_t:poly64x1_t
|
||||
generate float64x2_t:poly8x16_t, float64x2_t:poly16x8_t, float32x4_t:poly64x2_t, float64x2_t:poly64x2_t
|
||||
|
||||
arm = str
|
||||
generate float32x2_t:int8x8_t, float32x2_t:int16x4_t, float32x2_t:int32x2_t, float32x2_t:int64x1_t
|
||||
generate float32x4_t:int8x16_t, float32x4_t:int16x8_t, float32x4_t:int32x4_t, float32x4_t:int64x2_t
|
||||
generate float32x2_t:uint8x8_t, float32x2_t:uint16x4_t, float32x2_t:uint32x2_t, float32x2_t:uint64x1_t
|
||||
generate float32x4_t:uint8x16_t, float32x4_t:uint16x8_t, float32x4_t:uint32x4_t, float32x4_t:uint64x2_t
|
||||
generate float32x2_t:poly8x8_t, float32x2_t:poly16x4_t
|
||||
generate float32x4_t:poly8x16_t, float32x4_t:poly16x8_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
validate 0., 0., 0., 0., 0., 0., 0., 0.
|
||||
|
||||
aarch64 = str
|
||||
generate int8x8_t:float64x1_t, int16x4_t:float64x1_t, int32x2_t:float64x1_t, int64x1_t:float64x1_t
|
||||
generate int8x16_t:float64x2_t, int16x8_t:float64x2_t, int32x4_t:float64x2_t, int64x2_t:float64x2_t
|
||||
generate poly8x8_t:float64x1_t, uint16x4_t:float64x1_t, uint32x2_t:float64x1_t, uint64x1_t:float64x1_t
|
||||
generate poly8x16_t:float64x2_t, uint16x8_t:float64x2_t, uint32x4_t:float64x2_t, uint64x2_t:float64x2_t
|
||||
generate uint8x8_t:float64x1_t, poly16x4_t:float64x1_t, poly64x1_t:float64x1_t, poly64x1_t:float32x2_t
|
||||
generate uint8x16_t:float64x2_t, poly16x8_t:float64x2_t, poly64x2_t:float64x2_t, poly64x2_t:float32x4_t
|
||||
|
||||
arm = str
|
||||
generate int8x8_t:float32x2_t, int16x4_t:float32x2_t, int32x2_t:float32x2_t, int64x1_t:float32x2_t
|
||||
generate int8x16_t:float32x4_t, int16x8_t:float32x4_t, int32x4_t:float32x4_t, int64x2_t:float32x4_t
|
||||
generate uint8x8_t:float32x2_t, uint16x4_t:float32x2_t, uint32x2_t:float32x2_t, uint64x1_t:float32x2_t
|
||||
generate uint8x16_t:float32x4_t, uint16x8_t:float32x4_t, uint32x4_t:float32x4_t, uint64x2_t:float32x4_t
|
||||
generate poly8x8_t:float32x2_t, poly16x4_t:float32x2_t
|
||||
generate poly8x16_t:float32x4_t, poly16x8_t:float32x4_t
|
||||
|
||||
/// Vector reinterpret cast operation
|
||||
name = vreinterpret
|
||||
double-suffixes
|
||||
fn = transmute
|
||||
a = 0., 0., 0., 0., 0., 0., 0., 0.
|
||||
validate 0., 0., 0., 0., 0., 0., 0., 0.
|
||||
|
||||
aarch64 = str
|
||||
generate float32x2_t:float64x1_t, float64x1_t:float32x2_t
|
||||
generate float32x4_t:float64x2_t, float64x2_t:float32x4_t
|
||||
|
||||
/// Transpose vectors
|
||||
name = vtrn1
|
||||
multi_fn = simd_shuffle-in_len-noext, a, b, {transpose-1-in_len}
|
||||
|
|
|
|||
|
|
@ -165,22 +165,21 @@ fn type_to_unsigned_suffix(t: &str) -> &str {
|
|||
}
|
||||
}
|
||||
|
||||
fn type_to_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> &'a str {
|
||||
match (out_t, in_t) {
|
||||
("float32x2_t", "float64x2_t") => "_f32_f64",
|
||||
("float64x2_t", "float32x2_t") => "_f64_f32",
|
||||
("float64x2_t", "float32x4_t") => "_f64_f32",
|
||||
("float32x4_t", "float64x2_t") => "_f32_f64",
|
||||
("int32x2_t", "float32x2_t") => "_s32_f32",
|
||||
("int32x4_t", "float32x4_t") => "q_s32_f32",
|
||||
("int64x1_t", "float64x1_t") => "_s64_f64",
|
||||
("int64x2_t", "float64x2_t") => "q_s64_f64",
|
||||
("uint32x2_t", "float32x2_t") => "_u32_f32",
|
||||
("uint32x4_t", "float32x4_t") => "q_u32_f32",
|
||||
("uint64x1_t", "float64x1_t") => "_u64_f64",
|
||||
("uint64x2_t", "float64x2_t") => "q_u64_f64",
|
||||
(_, _) => panic!("unknown type: {}, {}", out_t, in_t),
|
||||
fn type_to_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
|
||||
let mut str = String::new();
|
||||
if type_to_suffix(in_t).starts_with("q") && type_to_suffix(out_t).starts_with("q") {
|
||||
str.push_str("q");
|
||||
}
|
||||
str.push_str(type_to_noq_suffix(out_t));
|
||||
str.push_str(type_to_noq_suffix(in_t));
|
||||
str
|
||||
}
|
||||
|
||||
fn type_to_noq_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> String {
|
||||
let mut str = String::new();
|
||||
str.push_str(type_to_noq_suffix(out_t));
|
||||
str.push_str(type_to_noq_suffix(in_t));
|
||||
str
|
||||
}
|
||||
|
||||
fn type_to_noq_suffix(t: &str) -> &str {
|
||||
|
|
@ -197,6 +196,7 @@ fn type_to_noq_suffix(t: &str) -> &str {
|
|||
"float32x2_t" | "float32x4_t" => "_f32",
|
||||
"float64x1_t" | "float64x2_t" => "_f64",
|
||||
"poly8x8_t" | "poly8x16_t" => "_p8",
|
||||
"poly16x4_t" | "poly16x8_t" => "_p16",
|
||||
"poly64x1_t" | "poly64x2_t" => "_p64",
|
||||
_ => panic!("unknown type: {}", t),
|
||||
}
|
||||
|
|
@ -207,6 +207,7 @@ enum Suffix {
|
|||
Normal,
|
||||
Double,
|
||||
NoQ,
|
||||
NoQDouble,
|
||||
}
|
||||
|
||||
fn type_to_global_type(t: &str) -> &str {
|
||||
|
|
@ -518,6 +519,11 @@ fn gen_aarch64(
|
|||
current_name,
|
||||
type_to_double_suffixes(out_t, in_t[1])
|
||||
),
|
||||
NoQDouble => format!(
|
||||
"{}{}",
|
||||
current_name,
|
||||
type_to_noq_double_suffixes(out_t, in_t[1])
|
||||
),
|
||||
};
|
||||
let current_fn = if let Some(current_fn) = current_fn.clone() {
|
||||
if link_aarch64.is_some() {
|
||||
|
|
@ -772,6 +778,11 @@ fn gen_arm(
|
|||
current_name,
|
||||
type_to_double_suffixes(out_t, in_t[1])
|
||||
),
|
||||
NoQDouble => format!(
|
||||
"{}{}",
|
||||
current_name,
|
||||
type_to_noq_double_suffixes(out_t, in_t[1])
|
||||
),
|
||||
};
|
||||
let current_aarch64 = current_aarch64
|
||||
.clone()
|
||||
|
|
@ -1113,7 +1124,9 @@ fn get_call(
|
|||
} else if fn_format[1] == "unsigned" {
|
||||
fn_name.push_str(type_to_unsigned_suffix(in_t[1]));
|
||||
} else if fn_format[1] == "doubleself" {
|
||||
fn_name.push_str(type_to_double_suffixes(out_t, in_t[1]));
|
||||
fn_name.push_str(&type_to_double_suffixes(out_t, in_t[1]));
|
||||
} else if fn_format[1] == "noq_doubleself" {
|
||||
fn_name.push_str(&type_to_noq_double_suffixes(out_t, in_t[1]));
|
||||
} else if fn_format[1] == "noqself" {
|
||||
fn_name.push_str(type_to_noq_suffix(in_t[1]));
|
||||
} else if fn_format[1] == "nosuffix" {
|
||||
|
|
@ -1255,6 +1268,8 @@ mod test {
|
|||
suffix = Double;
|
||||
} else if line.starts_with("no-q") {
|
||||
suffix = NoQ;
|
||||
} else if line.starts_with("noq-double-suffixes") {
|
||||
suffix = NoQDouble;
|
||||
} else if line.starts_with("a = ") {
|
||||
a = line[4..].split(',').map(|v| v.trim().to_string()).collect();
|
||||
} else if line.starts_with("b = ") {
|
||||
|
|
|
|||
|
|
@ -396,7 +396,7 @@ fn verify_all_signatures() {
|
|||
}
|
||||
// Skip some intrinsics that are present in GCC and Clang but
|
||||
// are missing from the official documentation.
|
||||
let skip_intrinsic_verify = ["vmov_n_p64", "vmovq_n_p64"];
|
||||
let skip_intrinsic_verify = ["vmov_n_p64", "vmovq_n_p64", "vreinterpret_p64_s64", "vreinterpret_f32_p64", "vreinterpretq_f32_p64"];
|
||||
let arm = match map.get(rust.name) {
|
||||
Some(i) => i,
|
||||
None => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue