Add vrndn neon instructions (#1086)

This adds the neon instructions for lane-wise rounding without actually
converting the lanes to integers.
This commit is contained in:
Christopher Serr 2021-04-22 07:08:40 +02:00 committed by GitHub
parent de3e8f72c5
commit a43f92a181
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 55 additions and 44 deletions

View file

@ -2518,32 +2518,6 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
vrndaq_f64_(a)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintn))]
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
}
vrndn_f32_(a)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(frintn))]
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndnq_f32_(a)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
@ -8884,22 +8858,6 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndn_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-2.0, 0.0);
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndnq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndn_f64() {
let a: f64 = -1.5;

View file

@ -4198,6 +4198,38 @@ pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
vrhaddq_s32_(a, b)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
}
vrndn_f32_(a)
}
/// Floating-point round to integral, to nearest with ties to even
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
}
vrndnq_f32_(a)
}
/// Saturating add
#[inline]
#[target_feature(enable = "neon")]
@ -14921,6 +14953,22 @@ mod test {
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndn_f32() {
let a: f32x2 = f32x2::new(-1.5, 0.5);
let e: f32x2 = f32x2::new(-2.0, 0.0);
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vrndnq_f32() {
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vqadd_u8() {
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);

View file

@ -1401,7 +1401,12 @@ validate -2.0, 0.0, 2.0, 2.0
link-aarch64 = frintn._EXT_
aarch64 = frintn
generate float*_t, float64x*_t
generate float64x*_t
target = fp-armv8
arm = vrintn
link-arm = vrintn._EXT_
generate float*_t
/// Floating-point round to integral, toward minus infinity
name = vrndm
@ -3901,4 +3906,4 @@ validate MAX, 7
aarch64 = sqabs
link-aarch64 = sqabs._EXT_
generate int64x*_t
generate int64x*_t