Add more AArch64 vrnd intrinsics.
LLVM can't select float64x1_t variants, but float64x2_t variants work.
This commit is contained in:
parent
a9fecd8456
commit
0459405ea9
3 changed files with 102 additions and 9 deletions
|
|
@ -15297,6 +15297,21 @@ pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
|
|||
vrnd32xq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 32-bit integer, using current rounding mode
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,frintts")]
|
||||
#[cfg_attr(test, assert_instr(frint32x))]
|
||||
pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "unadjusted" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v2f64")]
|
||||
fn vrnd32xq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrnd32xq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 32-bit integer toward zero
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)
|
||||
|
|
@ -15327,6 +15342,21 @@ pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
|
|||
vrnd32zq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 32-bit integer toward zero
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,frintts")]
|
||||
#[cfg_attr(test, assert_instr(frint32z))]
|
||||
pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "unadjusted" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v2f64")]
|
||||
fn vrnd32zq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrnd32zq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 64-bit integer, using current rounding mode
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)
|
||||
|
|
@ -15357,6 +15387,21 @@ pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
|
|||
vrnd64xq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 64-bit integer, using current rounding mode
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,frintts")]
|
||||
#[cfg_attr(test, assert_instr(frint64x))]
|
||||
pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "unadjusted" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v2f64")]
|
||||
fn vrnd64xq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrnd64xq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 64-bit integer toward zero
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)
|
||||
|
|
@ -15387,6 +15432,21 @@ pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
|
|||
vrnd64zq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 64-bit integer toward zero
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,frintts")]
|
||||
#[cfg_attr(test, assert_instr(frint64z))]
|
||||
pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "unadjusted" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v2f64")]
|
||||
fn vrnd64zq_f64_(a: float64x2_t) -> float64x2_t;
|
||||
}
|
||||
vrnd64zq_f64_(a)
|
||||
}
|
||||
|
||||
/// Transpose vectors
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)
|
||||
|
|
@ -26810,6 +26870,14 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32xq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.1, 1.9);
|
||||
let e: f64x2 = f64x2::new(1.0, 2.0);
|
||||
let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32z_f32() {
|
||||
let a: f32x2 = f32x2::new(1.1, 1.9);
|
||||
|
|
@ -26826,6 +26894,14 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32zq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.1, 1.9);
|
||||
let e: f64x2 = f64x2::new(1.0, 1.0);
|
||||
let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64x_f32() {
|
||||
let a: f32x2 = f32x2::new(1.1, 1.9);
|
||||
|
|
@ -26842,6 +26918,14 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64xq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.1, 1.9);
|
||||
let e: f64x2 = f64x2::new(1.0, 2.0);
|
||||
let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64z_f32() {
|
||||
let a: f32x2 = f32x2::new(1.1, 1.9);
|
||||
|
|
@ -26858,6 +26942,14 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64zq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.1, 1.9);
|
||||
let e: f64x2 = f64x2::new(1.0, 1.0);
|
||||
let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vtrn1_s8() {
|
||||
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);
|
||||
|
|
|
|||
|
|
@ -14,15 +14,16 @@ vbfmlaltq_laneq_f32
|
|||
vbfmmlaq_f32
|
||||
|
||||
|
||||
# Missing from both Clang and stdarch
|
||||
vrnd32x_f64
|
||||
# Implemented in stdarch, but missing in Clang.
|
||||
vrnd32xq_f64
|
||||
vrnd32z_f64
|
||||
vrnd32zq_f64
|
||||
vrnd64x_f64
|
||||
vrnd64xq_f64
|
||||
vrnd64z_f64
|
||||
vrnd64zq_f64
|
||||
# LLVM select error, and missing in Clang.
|
||||
vrnd32x_f64
|
||||
vrnd32z_f64
|
||||
vrnd64x_f64
|
||||
vrnd64z_f64
|
||||
|
||||
# LLVM select error in debug builds
|
||||
#vqshlu_n_s16
|
||||
|
|
|
|||
|
|
@ -7218,7 +7218,7 @@ target = frintts
|
|||
|
||||
aarch64 = frint32x
|
||||
link-aarch64 = frint32x._EXT_
|
||||
generate float32x2_t, float32x4_t
|
||||
generate float32x2_t, float32x4_t, float64x2_t
|
||||
|
||||
/// Floating-point round to 32-bit integer toward zero
|
||||
name = vrnd32z
|
||||
|
|
@ -7228,7 +7228,7 @@ target = frintts
|
|||
|
||||
aarch64 = frint32z
|
||||
link-aarch64 = frint32z._EXT_
|
||||
generate float32x2_t, float32x4_t
|
||||
generate float32x2_t, float32x4_t, float64x2_t
|
||||
|
||||
/// Floating-point round to 64-bit integer, using current rounding mode
|
||||
name = vrnd64x
|
||||
|
|
@ -7238,7 +7238,7 @@ target = frintts
|
|||
|
||||
aarch64 = frint64x
|
||||
link-aarch64 = frint64x._EXT_
|
||||
generate float32x2_t, float32x4_t
|
||||
generate float32x2_t, float32x4_t, float64x2_t
|
||||
|
||||
/// Floating-point round to 64-bit integer toward zero
|
||||
name = vrnd64z
|
||||
|
|
@ -7248,7 +7248,7 @@ target = frintts
|
|||
|
||||
aarch64 = frint64z
|
||||
link-aarch64 = frint64z._EXT_
|
||||
generate float32x2_t, float32x4_t
|
||||
generate float32x2_t, float32x4_t, float64x2_t
|
||||
|
||||
/// Transpose elements
|
||||
name = vtrn
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue