Add AArch64 vrnd*_f64 Neon intrinsics.
The LLVM intrinsic doesn't support float64x1_t, but the required instruction is a scalar form (e.g. `frint32x <Dd>, <Dn>`), so we can implement these using the scalar intrinsic. Note that Clang does not support these intrinsics, so they aren't covered by intrinsic-test. Additional validation is included in this patch to ensure that we're selecting an instruction with the same behaviour as the corresponding vector form (which all have intrinsic-tests).
This commit is contained in:
parent
0459405ea9
commit
31e17e39c2
2 changed files with 454 additions and 36 deletions
|
|
@ -15312,6 +15312,21 @@ pub unsafe fn vrnd32xq_f64(a: float64x2_t) -> float64x2_t {
|
|||
vrnd32xq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 32-bit integer, using current rounding mode
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32x_f64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,frintts")]
|
||||
#[cfg_attr(test, assert_instr(frint32x))]
|
||||
pub unsafe fn vrnd32x_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "unadjusted" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint32x.f64")]
|
||||
fn vrnd32x_f64_(a: f64) -> f64;
|
||||
}
|
||||
transmute(vrnd32x_f64_(simd_extract(a, 0)))
|
||||
}
|
||||
|
||||
/// Floating-point round to 32-bit integer toward zero
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f32)
|
||||
|
|
@ -15357,6 +15372,21 @@ pub unsafe fn vrnd32zq_f64(a: float64x2_t) -> float64x2_t {
|
|||
vrnd32zq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 32-bit integer toward zero
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd32z_f64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,frintts")]
|
||||
#[cfg_attr(test, assert_instr(frint32z))]
|
||||
pub unsafe fn vrnd32z_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "unadjusted" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint32z.f64")]
|
||||
fn vrnd32z_f64_(a: f64) -> f64;
|
||||
}
|
||||
transmute(vrnd32z_f64_(simd_extract(a, 0)))
|
||||
}
|
||||
|
||||
/// Floating-point round to 64-bit integer, using current rounding mode
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f32)
|
||||
|
|
@ -15402,6 +15432,21 @@ pub unsafe fn vrnd64xq_f64(a: float64x2_t) -> float64x2_t {
|
|||
vrnd64xq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 64-bit integer, using current rounding mode
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64x_f64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,frintts")]
|
||||
#[cfg_attr(test, assert_instr(frint64x))]
|
||||
pub unsafe fn vrnd64x_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "unadjusted" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint64x.f64")]
|
||||
fn vrnd64x_f64_(a: f64) -> f64;
|
||||
}
|
||||
transmute(vrnd64x_f64_(simd_extract(a, 0)))
|
||||
}
|
||||
|
||||
/// Floating-point round to 64-bit integer toward zero
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f32)
|
||||
|
|
@ -15447,6 +15492,21 @@ pub unsafe fn vrnd64zq_f64(a: float64x2_t) -> float64x2_t {
|
|||
vrnd64zq_f64_(a)
|
||||
}
|
||||
|
||||
/// Floating-point round to 64-bit integer toward zero
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vrnd64z_f64)
|
||||
#[inline]
|
||||
#[target_feature(enable = "neon,frintts")]
|
||||
#[cfg_attr(test, assert_instr(frint64z))]
|
||||
pub unsafe fn vrnd64z_f64(a: float64x1_t) -> float64x1_t {
|
||||
#[allow(improper_ctypes)]
|
||||
extern "unadjusted" {
|
||||
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.frint64z.f64")]
|
||||
fn vrnd64z_f64_(a: f64) -> f64;
|
||||
}
|
||||
transmute(vrnd64z_f64_(simd_extract(a, 0)))
|
||||
}
|
||||
|
||||
/// Transpose vectors
|
||||
///
|
||||
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtrn1_s8)
|
||||
|
|
@ -26856,98 +26916,330 @@ mod test {
|
|||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32x_f32() {
|
||||
let a: f32x2 = f32x2::new(1.1, 1.9);
|
||||
let e: f32x2 = f32x2::new(1.0, 2.0);
|
||||
let a: f32x2 = f32x2::new(-1.5, 2.9);
|
||||
let e: f32x2 = f32x2::new(-2.0, 3.0);
|
||||
let r: f32x2 = transmute(vrnd32x_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32xq_f32() {
|
||||
let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
|
||||
let e: f32x4 = f32x4::new(1.0, 2.0, -2.0, -2.0);
|
||||
let a: f32x4 = f32x4::new(-1.5, 2.9, 1.5, -2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 3.0, 2.0, -2.0);
|
||||
let r: f32x4 = transmute(vrnd32xq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32xq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.1, 1.9);
|
||||
let e: f64x2 = f64x2::new(1.0, 2.0);
|
||||
let a: f64x2 = f64x2::new(-1.5, 2.9);
|
||||
let e: f64x2 = f64x2::new(-2.0, 3.0);
|
||||
let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(1.5, -2.5);
|
||||
let e: f64x2 = f64x2::new(2.0, -2.0);
|
||||
let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(2147483647.499999762, 2147483647.5);
|
||||
let e: f64x2 = f64x2::new(2147483647.0, -2147483648.0);
|
||||
let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(-2147483647.499999762, -2147483648.500000477);
|
||||
let e: f64x2 = f64x2::new(-2147483647.0, -2147483648.0);
|
||||
let r: f64x2 = transmute(vrnd32xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32x_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrnd32x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 1.5;
|
||||
let e: f64 = 2.0;
|
||||
let r: f64 = transmute(vrnd32x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 2147483647.499999762;
|
||||
let e: f64 = 2147483647.0;
|
||||
let r: f64 = transmute(vrnd32x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -2147483647.499999762;
|
||||
let e: f64 = -2147483647.0;
|
||||
let r: f64 = transmute(vrnd32x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 2.9;
|
||||
let e: f64 = 3.0;
|
||||
let r: f64 = transmute(vrnd32x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -2.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrnd32x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 2147483647.5;
|
||||
let e: f64 = -2147483648.0;
|
||||
let r: f64 = transmute(vrnd32x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -2147483648.500000477;
|
||||
let e: f64 = -2147483648.0;
|
||||
let r: f64 = transmute(vrnd32x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32z_f32() {
|
||||
let a: f32x2 = f32x2::new(1.1, 1.9);
|
||||
let e: f32x2 = f32x2::new(1.0, 1.0);
|
||||
let a: f32x2 = f32x2::new(-1.5, 2.9);
|
||||
let e: f32x2 = f32x2::new(-1.0, 2.0);
|
||||
let r: f32x2 = transmute(vrnd32z_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32zq_f32() {
|
||||
let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
|
||||
let e: f32x4 = f32x4::new(1.0, 1.0, -1.0, -2.0);
|
||||
let a: f32x4 = f32x4::new(-1.5, 2.9, 1.5, -2.5);
|
||||
let e: f32x4 = f32x4::new(-1.0, 2.0, 1.0, -2.0);
|
||||
let r: f32x4 = transmute(vrnd32zq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32zq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.1, 1.9);
|
||||
let e: f64x2 = f64x2::new(1.0, 1.0);
|
||||
let a: f64x2 = f64x2::new(-1.5, 2.9);
|
||||
let e: f64x2 = f64x2::new(-1.0, 2.0);
|
||||
let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(1.5, -2.5);
|
||||
let e: f64x2 = f64x2::new(1.0, -2.0);
|
||||
let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(2147483647.999999762, 2147483648.0);
|
||||
let e: f64x2 = f64x2::new(2147483647.0, -2147483648.0);
|
||||
let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(-2147483647.999999762, -2147483649.0);
|
||||
let e: f64x2 = f64x2::new(-2147483647.0, -2147483648.0);
|
||||
let r: f64x2 = transmute(vrnd32zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd32z_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -1.0;
|
||||
let r: f64 = transmute(vrnd32z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 1.5;
|
||||
let e: f64 = 1.0;
|
||||
let r: f64 = transmute(vrnd32z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 2147483647.999999762;
|
||||
let e: f64 = 2147483647.0;
|
||||
let r: f64 = transmute(vrnd32z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -2147483647.999999762;
|
||||
let e: f64 = -2147483647.0;
|
||||
let r: f64 = transmute(vrnd32z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 2.9;
|
||||
let e: f64 = 2.0;
|
||||
let r: f64 = transmute(vrnd32z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -2.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrnd32z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 2147483648.0;
|
||||
let e: f64 = -2147483648.0;
|
||||
let r: f64 = transmute(vrnd32z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -2147483649.0;
|
||||
let e: f64 = -2147483648.0;
|
||||
let r: f64 = transmute(vrnd32z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64x_f32() {
|
||||
let a: f32x2 = f32x2::new(1.1, 1.9);
|
||||
let e: f32x2 = f32x2::new(1.0, 2.0);
|
||||
let a: f32x2 = f32x2::new(-1.5, 2.9);
|
||||
let e: f32x2 = f32x2::new(-2.0, 3.0);
|
||||
let r: f32x2 = transmute(vrnd64x_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64xq_f32() {
|
||||
let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
|
||||
let e: f32x4 = f32x4::new(1.0, 2.0, -2.0, -2.0);
|
||||
let a: f32x4 = f32x4::new(-1.5, 2.9, 1.5, -2.5);
|
||||
let e: f32x4 = f32x4::new(-2.0, 3.0, 2.0, -2.0);
|
||||
let r: f32x4 = transmute(vrnd64xq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64xq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.1, 1.9);
|
||||
let e: f64x2 = f64x2::new(1.0, 2.0);
|
||||
let a: f64x2 = f64x2::new(-1.5, 2.9);
|
||||
let e: f64x2 = f64x2::new(-2.0, 3.0);
|
||||
let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(1.5, -2.5);
|
||||
let e: f64x2 = f64x2::new(2.0, -2.0);
|
||||
let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(9223372036854774784.0, 9223372036854775808.0);
|
||||
let e: f64x2 = f64x2::new(9223372036854774784.0, -9223372036854775808.0);
|
||||
let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(-9223372036854775808.0, -9223372036854777856.0);
|
||||
let e: f64x2 = f64x2::new(-9223372036854775808.0, -9223372036854775808.0);
|
||||
let r: f64x2 = transmute(vrnd64xq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64x_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrnd64x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 1.5;
|
||||
let e: f64 = 2.0;
|
||||
let r: f64 = transmute(vrnd64x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 9223372036854774784.0;
|
||||
let e: f64 = 9223372036854774784.0;
|
||||
let r: f64 = transmute(vrnd64x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -9223372036854775808.0;
|
||||
let e: f64 = -9223372036854775808.0;
|
||||
let r: f64 = transmute(vrnd64x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 2.9;
|
||||
let e: f64 = 3.0;
|
||||
let r: f64 = transmute(vrnd64x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -2.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrnd64x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 9223372036854775808.0;
|
||||
let e: f64 = -9223372036854775808.0;
|
||||
let r: f64 = transmute(vrnd64x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -9223372036854777856.0;
|
||||
let e: f64 = -9223372036854775808.0;
|
||||
let r: f64 = transmute(vrnd64x_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64z_f32() {
|
||||
let a: f32x2 = f32x2::new(1.1, 1.9);
|
||||
let e: f32x2 = f32x2::new(1.0, 1.0);
|
||||
let a: f32x2 = f32x2::new(-1.5, 2.9);
|
||||
let e: f32x2 = f32x2::new(-1.0, 2.0);
|
||||
let r: f32x2 = transmute(vrnd64z_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64zq_f32() {
|
||||
let a: f32x4 = f32x4::new(1.1, 1.9, -1.7, -2.3);
|
||||
let e: f32x4 = f32x4::new(1.0, 1.0, -1.0, -2.0);
|
||||
let a: f32x4 = f32x4::new(-1.5, 2.9, 1.5, -2.5);
|
||||
let e: f32x4 = f32x4::new(-1.0, 2.0, 1.0, -2.0);
|
||||
let r: f32x4 = transmute(vrnd64zq_f32(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64zq_f64() {
|
||||
let a: f64x2 = f64x2::new(1.1, 1.9);
|
||||
let e: f64x2 = f64x2::new(1.0, 1.0);
|
||||
let a: f64x2 = f64x2::new(-1.5, 2.9);
|
||||
let e: f64x2 = f64x2::new(-1.0, 2.0);
|
||||
let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(1.5, -2.5);
|
||||
let e: f64x2 = f64x2::new(1.0, -2.0);
|
||||
let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(9223372036854774784.0, 9223372036854775808.0);
|
||||
let e: f64x2 = f64x2::new(9223372036854774784.0, -9223372036854775808.0);
|
||||
let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64x2 = f64x2::new(-9223372036854775808.0, -9223372036854777856.0);
|
||||
let e: f64x2 = f64x2::new(-9223372036854775808.0, -9223372036854775808.0);
|
||||
let r: f64x2 = transmute(vrnd64zq_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon,frintts")]
|
||||
unsafe fn test_vrnd64z_f64() {
|
||||
let a: f64 = -1.5;
|
||||
let e: f64 = -1.0;
|
||||
let r: f64 = transmute(vrnd64z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 1.5;
|
||||
let e: f64 = 1.0;
|
||||
let r: f64 = transmute(vrnd64z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 9223372036854774784.0;
|
||||
let e: f64 = 9223372036854774784.0;
|
||||
let r: f64 = transmute(vrnd64z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -9223372036854775808.0;
|
||||
let e: f64 = -9223372036854775808.0;
|
||||
let r: f64 = transmute(vrnd64z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 2.9;
|
||||
let e: f64 = 2.0;
|
||||
let r: f64 = transmute(vrnd64z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -2.5;
|
||||
let e: f64 = -2.0;
|
||||
let r: f64 = transmute(vrnd64z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = 9223372036854775808.0;
|
||||
let e: f64 = -9223372036854775808.0;
|
||||
let r: f64 = transmute(vrnd64z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
|
||||
let a: f64 = -9223372036854777856.0;
|
||||
let e: f64 = -9223372036854775808.0;
|
||||
let r: f64 = transmute(vrnd64z_f64(transmute(a)));
|
||||
assert_eq!(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "neon")]
|
||||
|
|
|
|||
|
|
@ -7212,43 +7212,169 @@ generate uint64x2_t
|
|||
|
||||
/// Floating-point round to 32-bit integer, using current rounding mode
|
||||
name = vrnd32x
|
||||
a = 1.1, 1.9, -1.7, -2.3
|
||||
validate 1.0, 2.0, -2.0, -2.0
|
||||
target = frintts
|
||||
|
||||
// For validation, the rounding mode should be the default: round-to-nearest (ties-to-even).
|
||||
a = -1.5, 2.9, 1.5, -2.5
|
||||
validate -2.0, 3.0, 2.0, -2.0
|
||||
|
||||
aarch64 = frint32x
|
||||
link-aarch64 = frint32x._EXT_
|
||||
generate float32x2_t, float32x4_t, float64x2_t
|
||||
generate float32x2_t, float32x4_t
|
||||
|
||||
// The float64x1_t form uses a different LLVM link and isn't supported by Clang
|
||||
// (and so has no intrinsic-test), so perform extra validation to make sure
|
||||
// that it matches the float64x2_t form.
|
||||
|
||||
a = 1.5, -2.5
|
||||
validate 2.0, -2.0
|
||||
// - The biggest f64 that rounds to i32::MAX.
|
||||
// - The smallest positive f64 that rounds out of range.
|
||||
a = 2147483647.499999762, 2147483647.5
|
||||
validate 2147483647.0, -2147483648.0
|
||||
// - The smallest f64 that rounds to i32::MIN + 1.
|
||||
// - The largest negative f64 that rounds out of range.
|
||||
a = -2147483647.499999762, -2147483648.500000477
|
||||
validate -2147483647.0, -2147483648.0
|
||||
generate float64x2_t
|
||||
|
||||
// Odd-numbered tests for float64x1_t coverage.
|
||||
a = 2.9
|
||||
validate 3.0
|
||||
a = -2.5
|
||||
validate -2.0
|
||||
a = 2147483647.5
|
||||
validate -2147483648.0
|
||||
a = -2147483648.500000477
|
||||
validate -2147483648.0
|
||||
|
||||
multi_fn = transmute, {self-out-_, {simd_extract, a, 0}}
|
||||
link-aarch64 = llvm.aarch64.frint32x.f64:f64:::f64
|
||||
generate float64x1_t
|
||||
|
||||
/// Floating-point round to 32-bit integer toward zero
|
||||
name = vrnd32z
|
||||
a = 1.1, 1.9, -1.7, -2.3
|
||||
validate 1.0, 1.0, -1.0, -2.0
|
||||
target = frintts
|
||||
|
||||
a = -1.5, 2.9, 1.5, -2.5
|
||||
validate -1.0, 2.0, 1.0, -2.0
|
||||
|
||||
aarch64 = frint32z
|
||||
link-aarch64 = frint32z._EXT_
|
||||
generate float32x2_t, float32x4_t, float64x2_t
|
||||
generate float32x2_t, float32x4_t
|
||||
|
||||
// The float64x1_t form uses a different LLVM link and isn't supported by Clang
|
||||
// (and so has no intrinsic-test), so perform extra validation to make sure
|
||||
// that it matches the float64x2_t form.
|
||||
|
||||
a = 1.5, -2.5
|
||||
validate 1.0, -2.0
|
||||
// - The biggest f64 that rounds to i32::MAX.
|
||||
// - The smallest positive f64 that rounds out of range.
|
||||
a = 2147483647.999999762, 2147483648.0
|
||||
validate 2147483647.0, -2147483648.0
|
||||
// - The smallest f64 that rounds to i32::MIN + 1.
|
||||
// - The largest negative f64 that rounds out of range.
|
||||
a = -2147483647.999999762, -2147483649.0
|
||||
validate -2147483647.0, -2147483648.0
|
||||
generate float64x2_t
|
||||
|
||||
// Odd-numbered tests for float64x1_t coverage.
|
||||
a = 2.9
|
||||
validate 2.0
|
||||
a = -2.5
|
||||
validate -2.0
|
||||
a = 2147483648.0
|
||||
validate -2147483648.0
|
||||
a = -2147483649.0
|
||||
validate -2147483648.0
|
||||
|
||||
multi_fn = transmute, {self-out-_, {simd_extract, a, 0}}
|
||||
link-aarch64 = llvm.aarch64.frint32z.f64:f64:::f64
|
||||
generate float64x1_t
|
||||
|
||||
/// Floating-point round to 64-bit integer, using current rounding mode
|
||||
name = vrnd64x
|
||||
a = 1.1, 1.9, -1.7, -2.3
|
||||
validate 1.0, 2.0, -2.0, -2.0
|
||||
target = frintts
|
||||
|
||||
// For validation, the rounding mode should be the default: round-to-nearest (ties-to-even).
|
||||
a = -1.5, 2.9, 1.5, -2.5
|
||||
validate -2.0, 3.0, 2.0, -2.0
|
||||
|
||||
aarch64 = frint64x
|
||||
link-aarch64 = frint64x._EXT_
|
||||
generate float32x2_t, float32x4_t, float64x2_t
|
||||
generate float32x2_t, float32x4_t
|
||||
|
||||
// The float64x1_t form uses a different LLVM link and isn't supported by Clang
|
||||
// (and so has no intrinsic-test), so perform extra validation to make sure
|
||||
// that it matches the float64x2_t form.
|
||||
|
||||
a = 1.5, -2.5
|
||||
validate 2.0, -2.0
|
||||
// - The biggest f64 representable as an i64 (0x7ffffffffffffc00).
|
||||
// - The smallest positive f64 that is out of range (2^63).
|
||||
a = 9223372036854774784.0, 9223372036854775808.0
|
||||
validate 9223372036854774784.0, -9223372036854775808.0
|
||||
// - The smallest f64 representable as an i64 (i64::MIN).
|
||||
// - The biggest negative f64 that is out of range.
|
||||
a = -9223372036854775808.0, -9223372036854777856.0
|
||||
validate -9223372036854775808.0, -9223372036854775808.0
|
||||
generate float64x2_t
|
||||
|
||||
// Odd-numbered tests for float64x1_t coverage.
|
||||
a = 2.9
|
||||
validate 3.0
|
||||
a = -2.5
|
||||
validate -2.0
|
||||
a = 9223372036854775808.0
|
||||
validate -9223372036854775808.0
|
||||
a = -9223372036854777856.0
|
||||
validate -9223372036854775808.0
|
||||
|
||||
multi_fn = transmute, {self-out-_, {simd_extract, a, 0}}
|
||||
link-aarch64 = llvm.aarch64.frint64x.f64:f64:::f64
|
||||
generate float64x1_t
|
||||
|
||||
/// Floating-point round to 64-bit integer toward zero
|
||||
name = vrnd64z
|
||||
a = 1.1, 1.9, -1.7, -2.3
|
||||
validate 1.0, 1.0, -1.0, -2.0
|
||||
target = frintts
|
||||
|
||||
a = -1.5, 2.9, 1.5, -2.5
|
||||
validate -1.0, 2.0, 1.0, -2.0
|
||||
|
||||
aarch64 = frint64z
|
||||
link-aarch64 = frint64z._EXT_
|
||||
generate float32x2_t, float32x4_t, float64x2_t
|
||||
generate float32x2_t, float32x4_t
|
||||
|
||||
// The float64x1_t form uses a different LLVM link and isn't supported by Clang
|
||||
// (and so has no intrinsic-test), so perform extra validation to make sure
|
||||
// that it matches the float64x2_t form.
|
||||
|
||||
a = 1.5, -2.5
|
||||
validate 1.0, -2.0
|
||||
// - The biggest f64 representable as an i64 (0x7ffffffffffffc00).
|
||||
// - The smallest positive f64 that is out of range (2^63).
|
||||
a = 9223372036854774784.0, 9223372036854775808.0
|
||||
validate 9223372036854774784.0, -9223372036854775808.0
|
||||
// - The smallest f64 representable as an i64 (i64::MIN).
|
||||
// - The biggest negative f64 that is out of range.
|
||||
a = -9223372036854775808.0, -9223372036854777856.0
|
||||
validate -9223372036854775808.0, -9223372036854775808.0
|
||||
generate float64x2_t
|
||||
|
||||
// Odd-numbered tests for float64x1_t coverage.
|
||||
a = 2.9
|
||||
validate 2.0
|
||||
a = -2.5
|
||||
validate -2.0
|
||||
a = 9223372036854775808.0
|
||||
validate -9223372036854775808.0
|
||||
a = -9223372036854777856.0
|
||||
validate -9223372036854775808.0
|
||||
|
||||
multi_fn = transmute, {self-out-_, {simd_extract, a, 0}}
|
||||
link-aarch64 = llvm.aarch64.frint64z.f64:f64:::f64
|
||||
generate float64x1_t
|
||||
|
||||
/// Transpose elements
|
||||
name = vtrn
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue