add vcgez, vcgtz, vclez, vcltz neon instructions (#1069)

This commit is contained in:
Sparrow Li 2021-03-10 14:14:03 +08:00 committed by GitHub
parent 7583a92072
commit 7bc90053fd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 914 additions and 7 deletions

View file

@ -565,6 +565,438 @@ pub unsafe fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t {
simd_ge(a, b)
}
/// Compare signed greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgez_s8(a: int8x8_t) -> uint8x8_t {
let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_ge(a, transmute(b))
}
/// Compare signed greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgezq_s8(a: int8x16_t) -> uint8x16_t {
let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
simd_ge(a, transmute(b))
}
/// Compare signed greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgez_s16(a: int16x4_t) -> uint16x4_t {
let b: i16x4 = i16x4::new(0, 0, 0, 0);
simd_ge(a, transmute(b))
}
/// Compare signed greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgezq_s16(a: int16x8_t) -> uint16x8_t {
let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_ge(a, transmute(b))
}
/// Compare signed greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgez_s32(a: int32x2_t) -> uint32x2_t {
let b: i32x2 = i32x2::new(0, 0);
simd_ge(a, transmute(b))
}
/// Compare signed greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgezq_s32(a: int32x4_t) -> uint32x4_t {
let b: i32x4 = i32x4::new(0, 0, 0, 0);
simd_ge(a, transmute(b))
}
/// Compare signed greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgez_s64(a: int64x1_t) -> uint64x1_t {
let b: i64x1 = i64x1::new(0);
simd_ge(a, transmute(b))
}
/// Compare signed greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmge))]
pub unsafe fn vcgezq_s64(a: int64x2_t) -> uint64x2_t {
let b: i64x2 = i64x2::new(0, 0);
simd_ge(a, transmute(b))
}
/// Floating-point compare greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmge))]
pub unsafe fn vcgez_f32(a: float32x2_t) -> uint32x2_t {
let b: f32x2 = f32x2::new(0.0, 0.0);
simd_ge(a, transmute(b))
}
/// Floating-point compare greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmge))]
pub unsafe fn vcgezq_f32(a: float32x4_t) -> uint32x4_t {
let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
simd_ge(a, transmute(b))
}
/// Floating-point compare greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmge))]
pub unsafe fn vcgez_f64(a: float64x1_t) -> uint64x1_t {
let b: f64 = 0.0;
simd_ge(a, transmute(b))
}
/// Floating-point compare greater than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmge))]
pub unsafe fn vcgezq_f64(a: float64x2_t) -> uint64x2_t {
let b: f64x2 = f64x2::new(0.0, 0.0);
simd_ge(a, transmute(b))
}
/// Compare signed greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtz_s8(a: int8x8_t) -> uint8x8_t {
let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_gt(a, transmute(b))
}
/// Compare signed greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtzq_s8(a: int8x16_t) -> uint8x16_t {
let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
simd_gt(a, transmute(b))
}
/// Compare signed greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtz_s16(a: int16x4_t) -> uint16x4_t {
let b: i16x4 = i16x4::new(0, 0, 0, 0);
simd_gt(a, transmute(b))
}
/// Compare signed greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtzq_s16(a: int16x8_t) -> uint16x8_t {
let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_gt(a, transmute(b))
}
/// Compare signed greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtz_s32(a: int32x2_t) -> uint32x2_t {
let b: i32x2 = i32x2::new(0, 0);
simd_gt(a, transmute(b))
}
/// Compare signed greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtzq_s32(a: int32x4_t) -> uint32x4_t {
let b: i32x4 = i32x4::new(0, 0, 0, 0);
simd_gt(a, transmute(b))
}
/// Compare signed greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtz_s64(a: int64x1_t) -> uint64x1_t {
let b: i64x1 = i64x1::new(0);
simd_gt(a, transmute(b))
}
/// Compare signed greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vcgtzq_s64(a: int64x2_t) -> uint64x2_t {
let b: i64x2 = i64x2::new(0, 0);
simd_gt(a, transmute(b))
}
/// Floating-point compare greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmgt))]
pub unsafe fn vcgtz_f32(a: float32x2_t) -> uint32x2_t {
let b: f32x2 = f32x2::new(0.0, 0.0);
simd_gt(a, transmute(b))
}
/// Floating-point compare greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmgt))]
pub unsafe fn vcgtzq_f32(a: float32x4_t) -> uint32x4_t {
let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
simd_gt(a, transmute(b))
}
/// Floating-point compare greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmgt))]
pub unsafe fn vcgtz_f64(a: float64x1_t) -> uint64x1_t {
let b: f64 = 0.0;
simd_gt(a, transmute(b))
}
/// Floating-point compare greater than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmgt))]
pub unsafe fn vcgtzq_f64(a: float64x2_t) -> uint64x2_t {
let b: f64x2 = f64x2::new(0.0, 0.0);
simd_gt(a, transmute(b))
}
/// Compare signed less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclez_s8(a: int8x8_t) -> uint8x8_t {
let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_le(a, transmute(b))
}
/// Compare signed less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclezq_s8(a: int8x16_t) -> uint8x16_t {
let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
simd_le(a, transmute(b))
}
/// Compare signed less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclez_s16(a: int16x4_t) -> uint16x4_t {
let b: i16x4 = i16x4::new(0, 0, 0, 0);
simd_le(a, transmute(b))
}
/// Compare signed less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclezq_s16(a: int16x8_t) -> uint16x8_t {
let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_le(a, transmute(b))
}
/// Compare signed less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclez_s32(a: int32x2_t) -> uint32x2_t {
let b: i32x2 = i32x2::new(0, 0);
simd_le(a, transmute(b))
}
/// Compare signed less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclezq_s32(a: int32x4_t) -> uint32x4_t {
let b: i32x4 = i32x4::new(0, 0, 0, 0);
simd_le(a, transmute(b))
}
/// Compare signed less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclez_s64(a: int64x1_t) -> uint64x1_t {
let b: i64x1 = i64x1::new(0);
simd_le(a, transmute(b))
}
/// Compare signed less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(cmgt))]
pub unsafe fn vclezq_s64(a: int64x2_t) -> uint64x2_t {
let b: i64x2 = i64x2::new(0, 0);
simd_le(a, transmute(b))
}
/// Floating-point compare less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmle))]
pub unsafe fn vclez_f32(a: float32x2_t) -> uint32x2_t {
let b: f32x2 = f32x2::new(0.0, 0.0);
simd_le(a, transmute(b))
}
/// Floating-point compare less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmle))]
pub unsafe fn vclezq_f32(a: float32x4_t) -> uint32x4_t {
let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
simd_le(a, transmute(b))
}
/// Floating-point compare less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmle))]
pub unsafe fn vclez_f64(a: float64x1_t) -> uint64x1_t {
let b: f64 = 0.0;
simd_le(a, transmute(b))
}
/// Floating-point compare less than or equal to zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmle))]
pub unsafe fn vclezq_f64(a: float64x2_t) -> uint64x2_t {
let b: f64x2 = f64x2::new(0.0, 0.0);
simd_le(a, transmute(b))
}
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
pub unsafe fn vcltz_s8(a: int8x8_t) -> uint8x8_t {
let b: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_lt(a, transmute(b))
}
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
pub unsafe fn vcltzq_s8(a: int8x16_t) -> uint8x16_t {
let b: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
simd_lt(a, transmute(b))
}
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
pub unsafe fn vcltz_s16(a: int16x4_t) -> uint16x4_t {
let b: i16x4 = i16x4::new(0, 0, 0, 0);
simd_lt(a, transmute(b))
}
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
pub unsafe fn vcltzq_s16(a: int16x8_t) -> uint16x8_t {
let b: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0);
simd_lt(a, transmute(b))
}
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
pub unsafe fn vcltz_s32(a: int32x2_t) -> uint32x2_t {
let b: i32x2 = i32x2::new(0, 0);
simd_lt(a, transmute(b))
}
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
pub unsafe fn vcltzq_s32(a: int32x4_t) -> uint32x4_t {
let b: i32x4 = i32x4::new(0, 0, 0, 0);
simd_lt(a, transmute(b))
}
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
pub unsafe fn vcltz_s64(a: int64x1_t) -> uint64x1_t {
let b: i64x1 = i64x1::new(0);
simd_lt(a, transmute(b))
}
/// Compare signed less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(sshr))]
pub unsafe fn vcltzq_s64(a: int64x2_t) -> uint64x2_t {
let b: i64x2 = i64x2::new(0, 0);
simd_lt(a, transmute(b))
}
/// Floating-point compare less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmlt))]
pub unsafe fn vcltz_f32(a: float32x2_t) -> uint32x2_t {
let b: f32x2 = f32x2::new(0.0, 0.0);
simd_lt(a, transmute(b))
}
/// Floating-point compare less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmlt))]
pub unsafe fn vcltzq_f32(a: float32x4_t) -> uint32x4_t {
let b: f32x4 = f32x4::new(0.0, 0.0, 0.0, 0.0);
simd_lt(a, transmute(b))
}
/// Floating-point compare less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmlt))]
pub unsafe fn vcltz_f64(a: float64x1_t) -> uint64x1_t {
let b: f64 = 0.0;
simd_lt(a, transmute(b))
}
/// Floating-point compare less than zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcmlt))]
pub unsafe fn vcltzq_f64(a: float64x2_t) -> uint64x2_t {
let b: f64x2 = f64x2::new(0.0, 0.0);
simd_lt(a, transmute(b))
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
@ -1244,6 +1676,390 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgez_s8() {
let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
let r: u8x8 = transmute(vcgez_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgezq_s8() {
let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
let r: u8x16 = transmute(vcgezq_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgez_s16() {
let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF);
let r: u16x4 = transmute(vcgez_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgezq_s16() {
let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
let r: u16x8 = transmute(vcgezq_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgez_s32() {
let a: i32x2 = i32x2::new(-2147483648, -1);
let e: u32x2 = u32x2::new(0, 0);
let r: u32x2 = transmute(vcgez_s32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgezq_s32() {
let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
let r: u32x4 = transmute(vcgezq_s32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgez_s64() {
let a: i64x1 = i64x1::new(-9223372036854775808);
let e: u64x1 = u64x1::new(0);
let r: u64x1 = transmute(vcgez_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgezq_s64() {
let a: i64x2 = i64x2::new(-9223372036854775808, -1);
let e: u64x2 = u64x2::new(0, 0);
let r: u64x2 = transmute(vcgezq_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgez_f32() {
let a: f32x2 = f32x2::new(-1.2, 0.0);
let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF);
let r: u32x2 = transmute(vcgez_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgezq_f32() {
let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
let r: u32x4 = transmute(vcgezq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgez_f64() {
let a: f64 = -1.2;
let e: u64x1 = u64x1::new(0);
let r: u64x1 = transmute(vcgez_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgezq_f64() {
let a: f64x2 = f64x2::new(-1.2, 0.0);
let e: u64x2 = u64x2::new(0, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcgezq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtz_s8() {
let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
let e: u8x8 = u8x8::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
let r: u8x8 = transmute(vcgtz_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtzq_s8() {
let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
let e: u8x16 = u8x16::new(0, 0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
let r: u8x16 = transmute(vcgtzq_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtz_s16() {
let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
let e: u16x4 = u16x4::new(0, 0, 0, 0xFF_FF);
let r: u16x4 = transmute(vcgtz_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtzq_s16() {
let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
let e: u16x8 = u16x8::new(0, 0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF);
let r: u16x8 = transmute(vcgtzq_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtz_s32() {
let a: i32x2 = i32x2::new(-2147483648, -1);
let e: u32x2 = u32x2::new(0, 0);
let r: u32x2 = transmute(vcgtz_s32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtzq_s32() {
let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF);
let r: u32x4 = transmute(vcgtzq_s32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtz_s64() {
let a: i64x1 = i64x1::new(-9223372036854775808);
let e: u64x1 = u64x1::new(0);
let r: u64x1 = transmute(vcgtz_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtzq_s64() {
let a: i64x2 = i64x2::new(-9223372036854775808, -1);
let e: u64x2 = u64x2::new(0, 0);
let r: u64x2 = transmute(vcgtzq_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtz_f32() {
let a: f32x2 = f32x2::new(-1.2, 0.0);
let e: u32x2 = u32x2::new(0, 0);
let r: u32x2 = transmute(vcgtz_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtzq_f32() {
let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
let r: u32x4 = transmute(vcgtzq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtz_f64() {
let a: f64 = -1.2;
let e: u64x1 = u64x1::new(0);
let r: u64x1 = transmute(vcgtz_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcgtzq_f64() {
let a: f64x2 = f64x2::new(-1.2, 0.0);
let e: u64x2 = u64x2::new(0, 0);
let r: u64x2 = transmute(vcgtzq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclez_s8() {
let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0);
let r: u8x8 = transmute(vclez_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclezq_s8() {
let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
let r: u8x16 = transmute(vclezq_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclez_s16() {
let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0);
let r: u16x4 = transmute(vclez_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclezq_s16() {
let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0);
let r: u16x8 = transmute(vclezq_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclez_s32() {
let a: i32x2 = i32x2::new(-2147483648, -1);
let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
let r: u32x2 = transmute(vclez_s32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclezq_s32() {
let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0);
let r: u32x4 = transmute(vclezq_s32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclez_s64() {
let a: i64x1 = i64x1::new(-9223372036854775808);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vclez_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclezq_s64() {
let a: i64x2 = i64x2::new(-9223372036854775808, -1);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vclezq_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclez_f32() {
let a: f32x2 = f32x2::new(-1.2, 0.0);
let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
let r: u32x2 = transmute(vclez_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclezq_f32() {
let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
let r: u32x4 = transmute(vclezq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclez_f64() {
let a: f64 = -1.2;
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vclez_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vclezq_f64() {
let a: f64x2 = f64x2::new(-1.2, 0.0);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vclezq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltz_s8() {
let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
let e: u8x8 = u8x8::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0);
let r: u8x8 = transmute(vcltz_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltzq_s8() {
let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x7F);
let e: u8x16 = u8x16::new(0xFF, 0xFF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
let r: u8x16 = transmute(vcltzq_s8(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltz_s16() {
let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01);
let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0, 0);
let r: u16x4 = transmute(vcltz_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltzq_s16() {
let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05);
let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0, 0, 0, 0, 0, 0);
let r: u16x8 = transmute(vcltzq_s16(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltz_s32() {
let a: i32x2 = i32x2::new(-2147483648, -1);
let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF);
let r: u32x2 = transmute(vcltz_s32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltzq_s32() {
let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01);
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0, 0);
let r: u32x4 = transmute(vcltzq_s32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltz_s64() {
let a: i64x1 = i64x1::new(-9223372036854775808);
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcltz_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltzq_s64() {
let a: i64x2 = i64x2::new(-9223372036854775808, -1);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x2 = transmute(vcltzq_s64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltz_f32() {
let a: f32x2 = f32x2::new(-1.2, 0.0);
let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0);
let r: u32x2 = transmute(vcltz_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltzq_f32() {
let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3);
let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0, 0);
let r: u32x4 = transmute(vcltzq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltz_f64() {
let a: f64 = -1.2;
let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF);
let r: u64x1 = transmute(vcltz_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcltzq_f64() {
let a: f64x2 = f64x2::new(-1.2, 0.0);
let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0);
let r: u64x2 = transmute(vcltzq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_f64() {
let a: f64 = 1.0;

View file

@ -424,6 +424,86 @@ arm = vcge.s
// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
/// Compare signed greater than or equal to zero
name = vcgez
fn = simd_ge
a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
aarch64 = cmge
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
/// Floating-point compare greater than or equal to zero
name = vcgez
fn = simd_ge
a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
validate FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
aarch64 = fcmge
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
/// Compare signed greater than zero
name = vcgtz
fn = simd_gt
a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
validate FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
aarch64 = cmgt
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
/// Floating-point compare greater than zero
name = vcgtz
fn = simd_gt
a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
validate FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE
aarch64 = fcmgt
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
/// Compare signed less than or equal to zero
name = vclez
fn = simd_le
a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
validate TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
aarch64 = cmgt
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
/// Floating-point compare less than or equal to zero
name = vclez
fn = simd_le
a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
aarch64 = fcmle
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
/// Compare signed less than zero
name = vcltz
fn = simd_lt
a = MIN, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, MAX
fixed = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
validate TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
aarch64 = sshr
generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t
/// Floating-point compare less than zero
name = vcltz
fn = simd_lt
a = -1.2, 0.0, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7
fixed = 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
validate TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
aarch64 = fcmlt
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
/// Saturating subtract
name = vqsub
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42

View file

@ -278,6 +278,7 @@ fn ff_val(t: &str) -> &'static str {
fn false_val(_t: &str) -> &'static str {
"0"
}
fn map_val<'v>(t: &str, v: &'v str) -> &'v str {
match v {
"FALSE" => false_val(t),
@ -390,6 +391,12 @@ fn gen_aarch64(
current_fn,
)
}
(_, 1, _) => format!(
r#"pub unsafe fn {}(a: {}) -> {} {{
{}{}
}}"#,
name, in_t, out_t, ext_c, multi_calls,
),
(_, 2, _) => format!(
r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{
{}{}
@ -584,6 +591,12 @@ fn gen_arm(
current_fn,
)
}
(_, 1, _) => format!(
r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{
{}{}
}}"#,
name, in_t, in_t, out_t, ext_c, multi_calls,
),
(_, 2, _) => format!(
r#"pub unsafe fn {}(a: {}, b: {}) -> {} {{
{}{}
@ -690,14 +703,12 @@ fn get_call(in_str: &str, in_t: &str, out_t: &str, fixed: &Vec<String>) -> Strin
let s = &params[i];
if s.contains(':') {
let re_params: Vec<_> = s.split(':').map(|v| v.to_string()).collect();
if re_params.len() == 1 {
if re_params[1] == "" {
re = Some((re_params[0].clone(), in_t.to_string()));
} else if re_params.len() == 2 {
if re_params[1] == "in_t" {
re = Some((re_params[0].clone(), in_t.to_string()));
} else if re_params[1] == "out_t" {
re = Some((re_params[0].clone(), out_t.to_string()));
}
} else if re_params[1] == "in_t" {
re = Some((re_params[0].clone(), in_t.to_string()));
} else if re_params[1] == "out_t" {
re = Some((re_params[0].clone(), out_t.to_string()));
}
} else {
if !param_str.is_empty() {