add vcvt, vcvta, vcvtn, vcvtm, vcvtp neon instructions (#1084)

This commit is contained in:
Sparrow Li 2021-03-16 22:30:05 +08:00 committed by GitHub
parent bb84df7d9f
commit 7accc82569
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 918 additions and 56 deletions

View file

@ -1153,6 +1153,454 @@ pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t
simd_shuffle4(a, vcvtx_f32_f64(b), [0, 1, 2, 3])
}
/// Floating-point convert to signed fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
simd_cast(a)
}
/// Floating-point convert to signed fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
simd_cast(a)
}
/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
simd_cast(a)
}
/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
simd_cast(a)
}
/// Floating-point convert to signed integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtas))]
pub unsafe fn vcvta_s32_f32(a: float32x2_t) -> int32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i32.v2f32")]
fn vcvta_s32_f32_(a: float32x2_t) -> int32x2_t;
}
vcvta_s32_f32_(a)
}
/// Floating-point convert to signed integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtas))]
pub unsafe fn vcvtaq_s32_f32(a: float32x4_t) -> int32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v4i32.v4f32")]
fn vcvtaq_s32_f32_(a: float32x4_t) -> int32x4_t;
}
vcvtaq_s32_f32_(a)
}
/// Floating-point convert to signed integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtas))]
pub unsafe fn vcvta_s64_f64(a: float64x1_t) -> int64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v1i64.v1f64")]
fn vcvta_s64_f64_(a: float64x1_t) -> int64x1_t;
}
vcvta_s64_f64_(a)
}
/// Floating-point convert to signed integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtas))]
pub unsafe fn vcvtaq_s64_f64(a: float64x2_t) -> int64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtas.v2i64.v2f64")]
fn vcvtaq_s64_f64_(a: float64x2_t) -> int64x2_t;
}
vcvtaq_s64_f64_(a)
}
/// Floating-point convert to signed integer, rounding to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtns))]
pub unsafe fn vcvtn_s32_f32(a: float32x2_t) -> int32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i32.v2f32")]
fn vcvtn_s32_f32_(a: float32x2_t) -> int32x2_t;
}
vcvtn_s32_f32_(a)
}
/// Floating-point convert to signed integer, rounding to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtns))]
pub unsafe fn vcvtnq_s32_f32(a: float32x4_t) -> int32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v4i32.v4f32")]
fn vcvtnq_s32_f32_(a: float32x4_t) -> int32x4_t;
}
vcvtnq_s32_f32_(a)
}
/// Floating-point convert to signed integer, rounding to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtns))]
pub unsafe fn vcvtn_s64_f64(a: float64x1_t) -> int64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v1i64.v1f64")]
fn vcvtn_s64_f64_(a: float64x1_t) -> int64x1_t;
}
vcvtn_s64_f64_(a)
}
/// Floating-point convert to signed integer, rounding to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtns))]
pub unsafe fn vcvtnq_s64_f64(a: float64x2_t) -> int64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtns.v2i64.v2f64")]
fn vcvtnq_s64_f64_(a: float64x2_t) -> int64x2_t;
}
vcvtnq_s64_f64_(a)
}
/// Floating-point convert to signed integer, rounding toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtms))]
pub unsafe fn vcvtm_s32_f32(a: float32x2_t) -> int32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i32.v2f32")]
fn vcvtm_s32_f32_(a: float32x2_t) -> int32x2_t;
}
vcvtm_s32_f32_(a)
}
/// Floating-point convert to signed integer, rounding toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtms))]
pub unsafe fn vcvtmq_s32_f32(a: float32x4_t) -> int32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v4i32.v4f32")]
fn vcvtmq_s32_f32_(a: float32x4_t) -> int32x4_t;
}
vcvtmq_s32_f32_(a)
}
/// Floating-point convert to signed integer, rounding toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtms))]
pub unsafe fn vcvtm_s64_f64(a: float64x1_t) -> int64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v1i64.v1f64")]
fn vcvtm_s64_f64_(a: float64x1_t) -> int64x1_t;
}
vcvtm_s64_f64_(a)
}
/// Floating-point convert to signed integer, rounding toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtms))]
pub unsafe fn vcvtmq_s64_f64(a: float64x2_t) -> int64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtms.v2i64.v2f64")]
fn vcvtmq_s64_f64_(a: float64x2_t) -> int64x2_t;
}
vcvtmq_s64_f64_(a)
}
/// Floating-point convert to signed integer, rounding toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtps))]
pub unsafe fn vcvtp_s32_f32(a: float32x2_t) -> int32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i32.v2f32")]
fn vcvtp_s32_f32_(a: float32x2_t) -> int32x2_t;
}
vcvtp_s32_f32_(a)
}
/// Floating-point convert to signed integer, rounding toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtps))]
pub unsafe fn vcvtpq_s32_f32(a: float32x4_t) -> int32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v4i32.v4f32")]
fn vcvtpq_s32_f32_(a: float32x4_t) -> int32x4_t;
}
vcvtpq_s32_f32_(a)
}
/// Floating-point convert to signed integer, rounding toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtps))]
pub unsafe fn vcvtp_s64_f64(a: float64x1_t) -> int64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v1i64.v1f64")]
fn vcvtp_s64_f64_(a: float64x1_t) -> int64x1_t;
}
vcvtp_s64_f64_(a)
}
/// Floating-point convert to signed integer, rounding toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtps))]
pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtps.v2i64.v2f64")]
fn vcvtpq_s64_f64_(a: float64x2_t) -> int64x2_t;
}
vcvtpq_s64_f64_(a)
}
/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtau))]
pub unsafe fn vcvta_u32_f32(a: float32x2_t) -> uint32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i32.v2f32")]
fn vcvta_u32_f32_(a: float32x2_t) -> uint32x2_t;
}
vcvta_u32_f32_(a)
}
/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtau))]
pub unsafe fn vcvtaq_u32_f32(a: float32x4_t) -> uint32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v4i32.v4f32")]
fn vcvtaq_u32_f32_(a: float32x4_t) -> uint32x4_t;
}
vcvtaq_u32_f32_(a)
}
/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtau))]
pub unsafe fn vcvta_u64_f64(a: float64x1_t) -> uint64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v1i64.v1f64")]
fn vcvta_u64_f64_(a: float64x1_t) -> uint64x1_t;
}
vcvta_u64_f64_(a)
}
/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtau))]
pub unsafe fn vcvtaq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtau.v2i64.v2f64")]
fn vcvtaq_u64_f64_(a: float64x2_t) -> uint64x2_t;
}
vcvtaq_u64_f64_(a)
}
/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtnu))]
pub unsafe fn vcvtn_u32_f32(a: float32x2_t) -> uint32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i32.v2f32")]
fn vcvtn_u32_f32_(a: float32x2_t) -> uint32x2_t;
}
vcvtn_u32_f32_(a)
}
/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtnu))]
pub unsafe fn vcvtnq_u32_f32(a: float32x4_t) -> uint32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v4i32.v4f32")]
fn vcvtnq_u32_f32_(a: float32x4_t) -> uint32x4_t;
}
vcvtnq_u32_f32_(a)
}
/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtnu))]
pub unsafe fn vcvtn_u64_f64(a: float64x1_t) -> uint64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v1i64.v1f64")]
fn vcvtn_u64_f64_(a: float64x1_t) -> uint64x1_t;
}
vcvtn_u64_f64_(a)
}
/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtnu))]
pub unsafe fn vcvtnq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtnu.v2i64.v2f64")]
fn vcvtnq_u64_f64_(a: float64x2_t) -> uint64x2_t;
}
vcvtnq_u64_f64_(a)
}
/// Floating-point convert to unsigned integer, rounding toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtmu))]
pub unsafe fn vcvtm_u32_f32(a: float32x2_t) -> uint32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i32.v2f32")]
fn vcvtm_u32_f32_(a: float32x2_t) -> uint32x2_t;
}
vcvtm_u32_f32_(a)
}
/// Floating-point convert to unsigned integer, rounding toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtmu))]
pub unsafe fn vcvtmq_u32_f32(a: float32x4_t) -> uint32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v4i32.v4f32")]
fn vcvtmq_u32_f32_(a: float32x4_t) -> uint32x4_t;
}
vcvtmq_u32_f32_(a)
}
/// Floating-point convert to unsigned integer, rounding toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtmu))]
pub unsafe fn vcvtm_u64_f64(a: float64x1_t) -> uint64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v1i64.v1f64")]
fn vcvtm_u64_f64_(a: float64x1_t) -> uint64x1_t;
}
vcvtm_u64_f64_(a)
}
/// Floating-point convert to unsigned integer, rounding toward minus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtmu))]
pub unsafe fn vcvtmq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtmu.v2i64.v2f64")]
fn vcvtmq_u64_f64_(a: float64x2_t) -> uint64x2_t;
}
vcvtmq_u64_f64_(a)
}
/// Floating-point convert to unsigned integer, rounding toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtpu))]
pub unsafe fn vcvtp_u32_f32(a: float32x2_t) -> uint32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i32.v2f32")]
fn vcvtp_u32_f32_(a: float32x2_t) -> uint32x2_t;
}
vcvtp_u32_f32_(a)
}
/// Floating-point convert to unsigned integer, rounding toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtpu))]
pub unsafe fn vcvtpq_u32_f32(a: float32x4_t) -> uint32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v4i32.v4f32")]
fn vcvtpq_u32_f32_(a: float32x4_t) -> uint32x4_t;
}
vcvtpq_u32_f32_(a)
}
/// Floating-point convert to unsigned integer, rounding toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtpu))]
pub unsafe fn vcvtp_u64_f64(a: float64x1_t) -> uint64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v1i64.v1f64")]
fn vcvtp_u64_f64_(a: float64x1_t) -> uint64x1_t;
}
vcvtp_u64_f64_(a)
}
/// Floating-point convert to unsigned integer, rounding toward plus infinity
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtpu))]
pub unsafe fn vcvtpq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtpu.v2i64.v2f64")]
fn vcvtpq_u64_f64_(a: float64x2_t) -> uint64x2_t;
}
vcvtpq_u64_f64_(a)
}
/// Multiply
#[inline]
#[target_feature(enable = "neon")]
@ -2470,6 +2918,294 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvt_s64_f64() {
let a: f64 = -1.1;
let e: i64x1 = i64x1::new(-1);
let r: i64x1 = transmute(vcvt_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_s64_f64() {
let a: f64x2 = f64x2::new(-1.1, 2.1);
let e: i64x2 = i64x2::new(-1, 2);
let r: i64x2 = transmute(vcvtq_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvt_u64_f64() {
let a: f64 = 1.1;
let e: u64x1 = u64x1::new(1);
let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_u64_f64() {
let a: f64x2 = f64x2::new(1.1, 2.1);
let e: u64x2 = u64x2::new(1, 2);
let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvta_s32_f32() {
let a: f32x2 = f32x2::new(-1.1, 2.1);
let e: i32x2 = i32x2::new(-1, 2);
let r: i32x2 = transmute(vcvta_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtaq_s32_f32() {
let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
let e: i32x4 = i32x4::new(-1, 2, -3, 4);
let r: i32x4 = transmute(vcvtaq_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvta_s64_f64() {
let a: f64 = -1.1;
let e: i64x1 = i64x1::new(-1);
let r: i64x1 = transmute(vcvta_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtaq_s64_f64() {
let a: f64x2 = f64x2::new(-1.1, 2.1);
let e: i64x2 = i64x2::new(-1, 2);
let r: i64x2 = transmute(vcvtaq_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtn_s32_f32() {
let a: f32x2 = f32x2::new(-1.5, 2.1);
let e: i32x2 = i32x2::new(-2, 2);
let r: i32x2 = transmute(vcvtn_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtnq_s32_f32() {
let a: f32x4 = f32x4::new(-1.5, 2.1, -2.9, 3.9);
let e: i32x4 = i32x4::new(-2, 2, -3, 4);
let r: i32x4 = transmute(vcvtnq_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtn_s64_f64() {
let a: f64 = -1.5;
let e: i64x1 = i64x1::new(-2);
let r: i64x1 = transmute(vcvtn_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtnq_s64_f64() {
let a: f64x2 = f64x2::new(-1.5, 2.1);
let e: i64x2 = i64x2::new(-2, 2);
let r: i64x2 = transmute(vcvtnq_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtm_s32_f32() {
let a: f32x2 = f32x2::new(-1.1, 2.1);
let e: i32x2 = i32x2::new(-2, 2);
let r: i32x2 = transmute(vcvtm_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtmq_s32_f32() {
let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
let e: i32x4 = i32x4::new(-2, 2, -3, 3);
let r: i32x4 = transmute(vcvtmq_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtm_s64_f64() {
let a: f64 = -1.1;
let e: i64x1 = i64x1::new(-2);
let r: i64x1 = transmute(vcvtm_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtmq_s64_f64() {
let a: f64x2 = f64x2::new(-1.1, 2.1);
let e: i64x2 = i64x2::new(-2, 2);
let r: i64x2 = transmute(vcvtmq_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtp_s32_f32() {
let a: f32x2 = f32x2::new(-1.1, 2.1);
let e: i32x2 = i32x2::new(-1, 3);
let r: i32x2 = transmute(vcvtp_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtpq_s32_f32() {
let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
let e: i32x4 = i32x4::new(-1, 3, -2, 4);
let r: i32x4 = transmute(vcvtpq_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtp_s64_f64() {
let a: f64 = -1.1;
let e: i64x1 = i64x1::new(-1);
let r: i64x1 = transmute(vcvtp_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtpq_s64_f64() {
let a: f64x2 = f64x2::new(-1.1, 2.1);
let e: i64x2 = i64x2::new(-1, 3);
let r: i64x2 = transmute(vcvtpq_s64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvta_u32_f32() {
let a: f32x2 = f32x2::new(1.1, 2.1);
let e: u32x2 = u32x2::new(1, 2);
let r: u32x2 = transmute(vcvta_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtaq_u32_f32() {
let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
let e: u32x4 = u32x4::new(1, 2, 3, 4);
let r: u32x4 = transmute(vcvtaq_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvta_u64_f64() {
let a: f64 = 1.1;
let e: u64x1 = u64x1::new(1);
let r: u64x1 = transmute(vcvta_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtaq_u64_f64() {
let a: f64x2 = f64x2::new(1.1, 2.1);
let e: u64x2 = u64x2::new(1, 2);
let r: u64x2 = transmute(vcvtaq_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtn_u32_f32() {
let a: f32x2 = f32x2::new(1.5, 2.1);
let e: u32x2 = u32x2::new(2, 2);
let r: u32x2 = transmute(vcvtn_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtnq_u32_f32() {
let a: f32x4 = f32x4::new(1.5, 2.1, 2.9, 3.9);
let e: u32x4 = u32x4::new(2, 2, 3, 4);
let r: u32x4 = transmute(vcvtnq_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtn_u64_f64() {
let a: f64 = 1.5;
let e: u64x1 = u64x1::new(2);
let r: u64x1 = transmute(vcvtn_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtnq_u64_f64() {
let a: f64x2 = f64x2::new(1.5, 2.1);
let e: u64x2 = u64x2::new(2, 2);
let r: u64x2 = transmute(vcvtnq_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtm_u32_f32() {
let a: f32x2 = f32x2::new(1.1, 2.1);
let e: u32x2 = u32x2::new(1, 2);
let r: u32x2 = transmute(vcvtm_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtmq_u32_f32() {
let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
let e: u32x4 = u32x4::new(1, 2, 2, 3);
let r: u32x4 = transmute(vcvtmq_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtm_u64_f64() {
let a: f64 = 1.1;
let e: u64x1 = u64x1::new(1);
let r: u64x1 = transmute(vcvtm_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtmq_u64_f64() {
let a: f64x2 = f64x2::new(1.1, 2.1);
let e: u64x2 = u64x2::new(1, 2);
let r: u64x2 = transmute(vcvtmq_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtp_u32_f32() {
let a: f32x2 = f32x2::new(1.1, 2.1);
let e: u32x2 = u32x2::new(2, 3);
let r: u32x2 = transmute(vcvtp_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtpq_u32_f32() {
let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
let e: u32x4 = u32x4::new(2, 3, 3, 4);
let r: u32x4 = transmute(vcvtpq_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtp_u64_f64() {
let a: f64 = 1.1;
let e: u64x1 = u64x1::new(2);
let r: u64x1 = transmute(vcvtp_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtpq_u64_f64() {
let a: f64x2 = f64x2::new(1.1, 2.1);
let e: u64x2 = u64x2::new(2, 3);
let r: u64x2 = transmute(vcvtpq_u64_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vmul_f64() {
let a: f64 = 1.0;

View file

@ -314,11 +314,6 @@ extern "C" {
c: uint8x16_t,
) -> int8x16_t;
#[link_name = "llvm.aarch64.neon.fcvtzu.v4i32.v4f32"]
fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
#[link_name = "llvm.aarch64.neon.fcvtzs.v4i32.v4f32"]
fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
#[link_name = "llvm.aarch64.neon.vsli.v8i8"]
fn vsli_n_s8_(a: int8x8_t, b: int8x8_t, n: i32) -> int8x8_t;
#[link_name = "llvm.aarch64.neon.vsli.v16i8"]
@ -2364,21 +2359,6 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol
))
}
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
vcvtq_s32_f32_(a)
}
/// Floating-point Convert to Unsigned fixed-point, rounding toward Zero (vector)
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
vcvtq_u32_f32_(a)
}
/// Shift Left and Insert (immediate)
#[inline]
#[target_feature(enable = "neon")]
@ -2749,42 +2729,6 @@ mod tests {
use std::mem::transmute;
use stdarch_test::simd_test;
#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_s32_f32() {
let f = f32x4::new(-1., 2., 3., 4.);
let e = i32x4::new(-1, 2, 3, 4);
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
assert_eq!(r, e);
let f = f32x4::new(10e37, 2., 3., 4.);
let e = i32x4::new(0x7fffffff, 2, 3, 4);
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
assert_eq!(r, e);
let f = f32x4::new(-10e37, 2., 3., 4.);
let e = i32x4::new(-0x80000000, 2, 3, 4);
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(f)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_u32_f32() {
let f = f32x4::new(1., 2., 3., 4.);
let e = u32x4::new(1, 2, 3, 4);
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
assert_eq!(r, e);
let f = f32x4::new(-1., 2., 3., 4.);
let e = u32x4::new(0, 2, 3, 4);
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
assert_eq!(r, e);
let f = f32x4::new(10e37, 2., 3., 4.);
let e = u32x4::new(0xffffffff, 2, 3, 4);
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(f)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vuqadd_s8() {
let a = i8x8::new(i8::MIN, -3, -2, -1, 0, 1, 2, i8::MAX);

View file

@ -1941,6 +1941,46 @@ pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
vcageq_f32(b, a)
}
/// Floating-point convert to signed fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
simd_cast(a)
}
/// Floating-point convert to signed fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
simd_cast(a)
}
/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
simd_cast(a)
}
/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))]
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
simd_cast(a)
}
/// Saturating subtract
#[inline]
#[target_feature(enable = "neon")]
@ -5485,6 +5525,38 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvt_s32_f32() {
let a: f32x2 = f32x2::new(-1.1, 2.1);
let e: i32x2 = i32x2::new(-1, 2);
let r: i32x2 = transmute(vcvt_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_s32_f32() {
let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9);
let e: i32x4 = i32x4::new(-1, 2, -2, 3);
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvt_u32_f32() {
let a: f32x2 = f32x2::new(1.1, 2.1);
let e: u32x2 = u32x2::new(1, 2);
let r: u32x2 = transmute(vcvt_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_u32_f32() {
let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9);
let e: u32x4 = u32x4::new(1, 2, 2, 3);
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqsub_u8() {
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);

View file

@ -652,6 +652,112 @@ validate -1.0, 2.0, -3.0, 4.0
aarch64 = fcvtxn
generate float32x2_t:float64x2_t:float32x4_t
/// Floating-point convert to signed fixed-point, rounding toward zero
name = vcvt
double-suffixes
fn = simd_cast
a = -1.1, 2.1, -2.9, 3.9
validate -1, 2, -2, 3
aarch64 = fcvtzs
generate float64x1_t:int64x1_t, float64x2_t:int64x2_t
arm = vcvt
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
/// Floating-point convert to unsigned fixed-point, rounding toward zero
name = vcvt
double-suffixes
fn = simd_cast
a = 1.1, 2.1, 2.9, 3.9
validate 1, 2, 2, 3
aarch64 = fcvtzu
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
arm = vcvt
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
/// Floating-point convert to signed integer, rounding to nearest with ties to away
name = vcvta
double-suffixes
a = -1.1, 2.1, -2.9, 3.9
validate -1, 2, -3, 4
aarch64 = fcvtas
link-aarch64 = fcvtas._EXT2_._EXT_
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
/// Floating-point convert to signed integer, rounding to nearest with ties to even
name = vcvtn
double-suffixes
a = -1.5, 2.1, -2.9, 3.9
validate -2, 2, -3, 4
aarch64 = fcvtns
link-aarch64 = fcvtns._EXT2_._EXT_
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
/// Floating-point convert to signed integer, rounding toward minus infinity
name = vcvtm
double-suffixes
a = -1.1, 2.1, -2.9, 3.9
validate -2, 2, -3, 3
aarch64 = fcvtms
link-aarch64 = fcvtms._EXT2_._EXT_
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
/// Floating-point convert to signed integer, rounding toward plus infinity
name = vcvtp
double-suffixes
a = -1.1, 2.1, -2.9, 3.9
validate -1, 3, -2, 4
aarch64 = fcvtps
link-aarch64 = fcvtps._EXT2_._EXT_
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
name = vcvta
double-suffixes
a = 1.1, 2.1, 2.9, 3.9
validate 1, 2, 3, 4
aarch64 = fcvtau
link-aarch64 = fcvtau._EXT2_._EXT_
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
/// Floating-point convert to unsigned integer, rounding to nearest with ties to even
name = vcvtn
double-suffixes
a = 1.5, 2.1, 2.9, 3.9
validate 2, 2, 3, 4
aarch64 = fcvtnu
link-aarch64 = fcvtnu._EXT2_._EXT_
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
/// Floating-point convert to unsigned integer, rounding toward minus infinity
name = vcvtm
double-suffixes
a = 1.1, 2.1, 2.9, 3.9
validate 1, 2, 2, 3
aarch64 = fcvtmu
link-aarch64 = fcvtmu._EXT2_._EXT_
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
/// Floating-point convert to unsigned integer, rounding toward plus infinity
name = vcvtp
double-suffixes
a = 1.1, 2.1, 2.9, 3.9
validate 2, 3, 3, 4
aarch64 = fcvtpu
link-aarch64 = fcvtpu._EXT2_._EXT_
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
/// Saturating subtract
name = vqsub
a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42

View file

@ -170,6 +170,10 @@ fn type_to_double_suffixes<'a>(out_t: &'a str, in_t: &'a str) -> &'a str {
("int32x4_t", "float32x4_t") => "q_s32_f32",
("int64x1_t", "float64x1_t") => "_s64_f64",
("int64x2_t", "float64x2_t") => "q_s64_f64",
("uint32x2_t", "float32x2_t") => "_u32_f32",
("uint32x4_t", "float32x4_t") => "q_u32_f32",
("uint64x1_t", "float64x1_t") => "_u64_f64",
("uint64x2_t", "float64x2_t") => "q_u64_f64",
(_, _) => panic!("unknown type: {}, {}", out_t, in_t),
}
}