Add vrndn neon instructions (#1086)
This adds the neon instructions for lane-wise rounding without actually converting the lanes to integers.
This commit is contained in:
parent
de3e8f72c5
commit
a43f92a181
3 changed files with 55 additions and 44 deletions
|
|
@ -2518,32 +2518,6 @@ pub unsafe fn vrndaq_f64(a: float64x2_t) -> float64x2_t {
|
|||
vrndaq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintn))]
|
||||
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
|
||||
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrndn_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(test, assert_instr(frintn))]
|
||||
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
|
||||
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndnq_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -8884,22 +8858,6 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndn_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-2.0, 0.0);
|
||||
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndnq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
|
||||
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndn_f64() {
|
||||
let a: f64 = -1.5;
|
||||
|
|
|
|||
|
|
@ -4198,6 +4198,38 @@ pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t {
|
|||
vrhaddq_s32_(a, b)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
|
||||
pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")]
|
||||
fn vrndn_f32_(a: float32x2_t) -> float32x2_t;
|
||||
}
|
||||
vrndn_f32_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to integral, to nearest with ties to even
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))]
|
||||
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))]
|
||||
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))]
|
||||
pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")]
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")]
|
||||
fn vrndnq_f32_(a: float32x4_t) -> float32x4_t;
|
||||
}
|
||||
vrndnq_f32_(a)
|
||||
}
|
||||
|
||||
/// Saturating add
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon")]
|
||||
|
|
@ -14921,6 +14953,22 @@ mod test {
|
|||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndn_f32() {
|
||||
let a: f32x2 = f32x2::new(-1.5, 0.5);
|
||||
let e: f32x2 = f32x2::new(-2.0, 0.0);
|
||||
let r: f32x2 = transmute(vrndn_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vrndnq_f32() {
|
||||
let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0);
|
||||
let r: f32x4 = transmute(vrndnq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
unsafe fn test_vqadd_u8() {
|
||||
let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42);
|
||||
|
|
|
|||
|
|
@ -1401,7 +1401,12 @@ validate -2.0, 0.0, 2.0, 2.0
|
|||
|
||||
link-aarch64 = frintn._EXT_
|
||||
aarch64 = frintn
|
||||
generate float*_t, float64x*_t
|
||||
generate float64x*_t
|
||||
|
||||
target = fp-armv8
|
||||
arm = vrintn
|
||||
link-arm = vrintn._EXT_
|
||||
generate float*_t
|
||||
|
||||
/// Floating-point round to integral, toward minus infinity
|
||||
name = vrndm
|
||||
|
|
@ -3901,4 +3906,4 @@ validate MAX, 7
|
|||
|
||||
aarch64 = sqabs
|
||||
link-aarch64 = sqabs._EXT_
|
||||
generate int64x*_t
|
||||
generate int64x*_t
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue