Add vroundps, vceilps, vfloorps, vsqrtps, vsqrtpd (#53)

* Add vroundps, vceilps, vfloorps, vsqrtps, vsqrtpd

* Uninhibit assert_instr on non-expanded intrinsics

Also use the new simd_test macro

* Use simd_test where possible

* Add automated tests for vround*

* Add target_feature guards to automated tests

* Move automated tests below their functions
This commit is contained in:
Adam Niederer 2017-09-27 22:41:37 -04:00 committed by Alex Crichton
parent bd75f7fcac
commit ddf4512561

View file

@ -110,6 +110,72 @@ pub unsafe fn _mm256_floor_pd(a: f64x4) -> f64x4 {
roundpd256(a, 0x01)
}
/// Round packed single-precision (32-bit) floating point elements in `a`
/// according to the flag `b`. The value of `b` may be as follows:
/// 0x00: Round to the nearest whole number.
/// 0x01: Round down, toward negative infinity.
/// 0x02: Round up, toward positive infinity.
/// 0x03: Truncate the values.
/// For a few additional values options, check the LLVM docs:
/// https://github.com/llvm-mirror/clang/blob/dcd8d797b20291f1a6b3e0ddda085aa2bbb382a8/lib/Headers/avxintrin.h#L382
#[inline(always)]
#[target_feature = "+avx"]
// #[cfg_attr(test, assert_instr(vroundps))]
// TODO: Replace with assert_expanded_instr https://github.com/rust-lang-nursery/stdsimd/issues/49
pub fn _mm256_round_ps(a: f32x8, b: i32) -> f32x8 {
macro_rules! call {
($imm8:expr) => {
unsafe { roundps256(a, $imm8) }
}
}
constify_imm8!(b, call)
}
// TODO: Remove once a macro is ipmlemented to automate these tests
// https://github.com/rust-lang-nursery/stdsimd/issues/49
#[cfg(test)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vroundps))]
fn test_mm256_round_ps(a: f32x8) -> f32x8 {
_mm256_round_ps(a, 0x00)
}
/// Round packed single-precision (32-bit) floating point elements in `a` toward
/// positive infinity.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vroundps))]
pub fn _mm256_ceil_ps(a: f32x8) -> f32x8 {
unsafe { roundps256(a, 0x02) }
}
/// Round packed single-precision (32-bit) floating point elements in `a` toward
/// negative infinity.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vroundps))]
pub fn _mm256_floor_ps(a: f32x8) -> f32x8 {
unsafe { roundps256(a, 0x01) }
}
/// Return the square root of packed single-precision (32-bit) floating point
/// elements in `a`.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub fn _mm256_sqrt_ps(a: f32x8) -> f32x8 {
unsafe { sqrtps256(a) }
}
/// Return the square root of packed double-precision (64-bit) floating point
/// elements in `a`.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub fn _mm256_sqrt_pd(a: f64x4) -> f64x4 {
unsafe { sqrtpd256(a) }
}
/// LLVM intrinsics used in the above functions
#[allow(improper_ctypes)]
extern "C" {
@ -119,9 +185,15 @@ extern "C" {
fn addsubps256(a: f32x8, b: f32x8) -> f32x8;
#[link_name = "llvm.x86.avx.round.pd.256"]
fn roundpd256(a: f64x4, b: i32) -> f64x4;
#[link_name = "llvm.x86.avx.round.ps.256"]
fn roundps256(a: f32x8, b: i32) -> f32x8;
#[link_name = "llvm.x86.avx.sqrt.pd.256"]
fn sqrtpd256(a: f64x4) -> f64x4;
#[link_name = "llvm.x86.avx.sqrt.ps.256"]
fn sqrtps256(a: f32x8) -> f32x8;
}
#[cfg(test)]
#[cfg(all(test, target_feature = "avx", any(target_arch = "x86", target_arch = "x86_64")))]
mod tests {
use stdsimd_test::simd_test;
@ -229,4 +301,51 @@ mod tests {
let expected_up = f64x4::new(2.0, 3.0, 4.0, -1.0);
assert_eq!(result_up, expected_up);
}
#[simd_test = "avx"]
fn _mm256_round_ps() {
let a = f32x8::new(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
let result_closest = avx::_mm256_round_ps(a, 0b00000000);
let result_down = avx::_mm256_round_ps(a, 0b00000001);
let result_up = avx::_mm256_round_ps(a, 0b00000010);
let expected_closest = f32x8::new(2.0, 2.0, 4.0, -1.0, 2.0, 2.0, 4.0, -1.0);
let expected_down = f32x8::new(1.0, 2.0, 3.0, -2.0, 1.0, 2.0, 3.0, -2.0);
let expected_up = f32x8::new(2.0, 3.0, 4.0, -1.0, 2.0, 3.0, 4.0, -1.0);
assert_eq!(result_closest, expected_closest);
assert_eq!(result_down, expected_down);
assert_eq!(result_up, expected_up);
}
#[simd_test = "avx"]
fn _mm256_floor_ps() {
let a = f32x8::new(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
let result_down = avx::_mm256_floor_ps(a);
let expected_down = f32x8::new(1.0, 2.0, 3.0, -2.0, 1.0, 2.0, 3.0, -2.0);
assert_eq!(result_down, expected_down);
}
#[simd_test = "avx"]
fn _mm256_ceil_ps() {
let a = f32x8::new(1.55, 2.2, 3.99, -1.2, 1.55, 2.2, 3.99, -1.2);
let result_up = avx::_mm256_ceil_ps(a);
let expected_up = f32x8::new(2.0, 3.0, 4.0, -1.0, 2.0, 3.0, 4.0, -1.0);
assert_eq!(result_up, expected_up);
}
#[simd_test = "avx"]
fn _mm256_sqrt_pd() {
let a = f64x4::new(4.0, 9.0, 16.0, 25.0);
let r = avx::_mm256_sqrt_pd(a, );
let e = f64x4::new(2.0, 3.0, 4.0, 5.0);
assert_eq!(r, e);
}
#[simd_test = "avx"]
fn _mm256_sqrt_ps() {
let a = f32x8::new(4.0, 9.0, 16.0, 25.0, 4.0, 9.0, 16.0, 25.0);
let r = avx::_mm256_sqrt_ps(a);
let e = f32x8::new(2.0, 3.0, 4.0, 5.0, 2.0, 3.0, 4.0, 5.0);
assert_eq!(r, e);
}
}