Add more AArch64 vrnd intrinsics.

LLVM can't select float64x1_t variants, but float64x2_t variants work.
This commit is contained in:
Jacob Bramley 2023-06-02 14:57:51 +01:00 committed by Amanieu d'Antras
parent a9fecd8456
commit 0459405ea9
3 changed files with 102 additions and 9 deletions

View file

@ -15297,6 +15297,21 @@ pub unsafe fn vrnd32xq_f32(a: float32x4_t) -> float32x4_t {
vrnd32xq_f32_(a)
}
/// Floating-point round to 32-bit integer, using current rounding mode
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32xq_f64)
#[inline]
#[target_feature(enable = "neon,frintts")]
#[cfg_attr(test, assert_instr(frint32x))]
pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32x.v2f64")]
fn vrnd32xq_f64_(a: float64x2_t) -> float64x2_t;
}
vrnd32xq_f64_(a)
}
/// Floating-point round to 32-bit integer toward zero
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)
@ -15327,6 +15342,21 @@ pub unsafe fn vrnd32zq_f32(a: float32x4_t) -> float32x4_t {
vrnd32zq_f32_(a)
}
/// Floating-point round to 32-bit integer toward zero
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32zq_f64)
#[inline]
#[target_feature(enable = "neon,frintts")]
#[cfg_attr(test, assert_instr(frint32z))]
pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint32z.v2f64")]
fn vrnd32zq_f64_(a: float64x2_t) -> float64x2_t;
}
vrnd32zq_f64_(a)
}
/// Floating-point round to 64-bit integer, using current rounding mode
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)
@ -15357,6 +15387,21 @@ pub unsafe fn vrnd64xq_f32(a: float32x4_t) -> float32x4_t {
vrnd64xq_f32_(a)
}
/// Floating-point round to 64-bit integer, using current rounding mode
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64xq_f64)
#[inline]
#[target_feature(enable = "neon,frintts")]
#[cfg_attr(test, assert_instr(frint64x))]
pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64x.v2f64")]
fn vrnd64xq_f64_(a: float64x2_t) -> float64x2_t;
}
vrnd64xq_f64_(a)
}
/// Floating-point round to 64-bit integer toward zero
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)
@ -15387,6 +15432,21 @@ pub unsafe fn vrnd64zq_f32(a: float32x4_t) -> float32x4_t {
vrnd64zq_f32_(a)
}
/// Floating-point round to 64-bit integer toward zero
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64zq_f64)
#[inline]
#[target_feature(enable = "neon,frintts")]
#[cfg_attr(test, assert_instr(frint64z))]
pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
#[allow(improper_ctypes)]
extern "unadjusted" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frint64z.v2f64")]
fn vrnd64zq_f64_(a: float64x2_t) -> float64x2_t;
}
vrnd64zq_f64_(a)
}
/// Transpose vectors
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)
@ -26810,6 +26870,14 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon,frintts")]
unsafe fn test_vrnd32xq_f64() {
let a: f64x2 = f64x2::new(1.1, 1.9);
let e: f64x2 = f64x2::new(1.0, 2.0);
let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon,frintts")]
unsafe fn test_vrnd32z_f32() {
let a: f32x2 = f32x2::new(1.1, 1.9);
@ -26826,6 +26894,14 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon,frintts")]
unsafe fn test_vrnd32zq_f64() {
let a: f64x2 = f64x2::new(1.1, 1.9);
let e: f64x2 = f64x2::new(1.0, 1.0);
let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon,frintts")]
unsafe fn test_vrnd64x_f32() {
let a: f32x2 = f32x2::new(1.1, 1.9);
@ -26842,6 +26918,14 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon,frintts")]
unsafe fn test_vrnd64xq_f64() {
let a: f64x2 = f64x2::new(1.1, 1.9);
let e: f64x2 = f64x2::new(1.0, 2.0);
let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon,frintts")]
unsafe fn test_vrnd64z_f32() {
let a: f32x2 = f32x2::new(1.1, 1.9);
@ -26858,6 +26942,14 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon,frintts")]
unsafe fn test_vrnd64zq_f64() {
let a: f64x2 = f64x2::new(1.1, 1.9);
let e: f64x2 = f64x2::new(1.0, 1.0);
let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vtrn1_s8() {
let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14);

View file

@ -14,15 +14,16 @@ vbfmlaltq_laneq_f32
vbfmmlaq_f32
# Missing from both Clang and stdarch
vrnd32x_f64
# Implemented in stdarch, but missing in Clang.
vrnd32xq_f64
vrnd32z_f64
vrnd32zq_f64
vrnd64x_f64
vrnd64xq_f64
vrnd64z_f64
vrnd64zq_f64
# LLVM select error, and missing in Clang.
vrnd32x_f64
vrnd32z_f64
vrnd64x_f64
vrnd64z_f64
# LLVM select error in debug builds
#vqshlu_n_s16

View file

@ -7218,7 +7218,7 @@ target = frintts
aarch64 = frint32x
link-aarch64 = frint32x._EXT_
generate float32x2_t, float32x4_t
generate float32x2_t, float32x4_t, float64x2_t
/// Floating-point round to 32-bit integer toward zero
name = vrnd32z
@ -7228,7 +7228,7 @@ target = frintts
aarch64 = frint32z
link-aarch64 = frint32z._EXT_
generate float32x2_t, float32x4_t
generate float32x2_t, float32x4_t, float64x2_t
/// Floating-point round to 64-bit integer, using current rounding mode
name = vrnd64x
@ -7238,7 +7238,7 @@ target = frintts
aarch64 = frint64x
link-aarch64 = frint64x._EXT_
generate float32x2_t, float32x4_t
generate float32x2_t, float32x4_t, float64x2_t
/// Floating-point round to 64-bit integer toward zero
name = vrnd64z
@ -7248,7 +7248,7 @@ target = frintts
aarch64 = frint64z
link-aarch64 = frint64z._EXT_
generate float32x2_t, float32x4_t
generate float32x2_t, float32x4_t, float64x2_t
/// Transpose elements
name = vtrn