Start adding some avx512 intrinsics (#618)

First one is the quite simple `_mm512_abs_epi32` intrinsic!
This commit is contained in:
Alex Crichton 2018-12-14 09:44:26 -06:00 committed by GitHub
parent cb921381c4
commit 67f8ed0bf4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 257 additions and 0 deletions

View file

@ -4,6 +4,9 @@ environment:
# default so pass a flag to disable it to ensure our tests work ok.
RUSTFLAGS: -Clink-args=/OPT:NOICF
# VS2017 looks to be the first with avx-512 support, notably in dumpbin
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
matrix:
- TARGET: x86_64-pc-windows-msvc

View file

@ -10,7 +10,9 @@ exclude = [
[profile.release]
debug = true
opt-level = 3
incremental = true
[profile.bench]
debug = 1
opt-level = 3
incremental = true

View file

@ -181,3 +181,11 @@ simd_ty!(i32x8[i32]:
i32, i32, i32, i32, i32, i32, i32, i32
| x0, x1, x2, x3, x4, x5, x6, x7);
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
// 512-bit wide types:
simd_ty!(i32x16[i32]:
i32, i32, i32, i32, i32, i32, i32, i32,
i32, i32, i32, i32, i32, i32, i32, i32
| x0, x1, x2, x3, x4, x5, x6, x7,
x8, x9, x10, x11, x12, x13, x14, x15);

View file

@ -0,0 +1,189 @@
use coresimd::simd::*;
use coresimd::x86::*;
use mem;
#[cfg(test)]
use stdsimd_test::assert_instr;
/// Computes the absolute values of packed 32-bit integers in `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), -1))
}
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
/// unsigned results in `dst` using writemask `k` (elements are copied from
/// `src` when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
mem::transmute(pabsd(a.as_i32x16(), src.as_i32x16(), k))
}
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
/// the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), k))
}
/// Return vector of type `__m512i` with all elements set to zero.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_si512() -> __m512i {
mem::zeroed()
}
/// Set packed 32-bit integers in `dst` with the supplied values in reverse
/// order.
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_epi32(
e15: i32,
e14: i32,
e13: i32,
e12: i32,
e11: i32,
e10: i32,
e9: i32,
e8: i32,
e7: i32,
e6: i32,
e5: i32,
e4: i32,
e3: i32,
e2: i32,
e1: i32,
e0: i32,
) -> __m512i {
let r = i32x16(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
);
mem::transmute(r)
}
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512.mask.pabs.d.512"]
fn pabsd(a: i32x16, b: i32x16, c: i16) -> i32x16;
}
#[cfg(test)]
mod tests {
use std;
use stdsimd_test::simd_test;
use coresimd::x86::*;
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
);
let r = _mm512_abs_epi32(a);
let e = _mm512_setr_epi32(
0,
1,
1,
std::i32::MAX,
std::i32::MAX.wrapping_add(1),
100,
100,
32,
0,
1,
1,
std::i32::MAX,
std::i32::MAX.wrapping_add(1),
100,
100,
32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
);
let r = _mm512_mask_abs_epi32(a, 0, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
let e = _mm512_setr_epi32(
0,
1,
1,
std::i32::MAX,
std::i32::MAX.wrapping_add(1),
100,
100,
32,
0,
1,
-1,
std::i32::MAX,
std::i32::MIN,
100,
-100,
-32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
0, 1, -1, std::i32::MAX,
std::i32::MIN, 100, -100, -32,
);
let r = _mm512_maskz_abs_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_abs_epi32(0b11111111, a);
let e = _mm512_setr_epi32(
0,
1,
1,
std::i32::MAX,
std::i32::MAX.wrapping_add(1),
100,
100,
32,
0,
0,
0,
0,
0,
0,
0,
0,
);
assert_eq_m512i(r, e);
}
}

View file

@ -391,6 +391,10 @@ types! {
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
}
/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer
#[allow(non_camel_case_types)]
pub type __mmask16 = i16;
#[cfg(test)]
mod test;
#[cfg(test)]
@ -502,6 +506,24 @@ impl m256iExt for __m256i {
}
}
#[allow(non_camel_case_types)]
#[unstable(feature = "stdimd_internal", issue = "0")]
pub(crate) trait m512iExt: Sized {
fn as_m512i(self) -> __m512i;
#[inline]
fn as_i32x16(self) -> ::coresimd::simd::i32x16 {
unsafe { mem::transmute(self.as_m512i()) }
}
}
impl m512iExt for __m512i {
#[inline]
fn as_m512i(self) -> Self {
self
}
}
mod eflags;
pub use self::eflags::*;
@ -580,3 +602,6 @@ use stdsimd_test::assert_instr;
pub unsafe fn ud2() -> ! {
::intrinsics::abort()
}
mod avx512f;
pub use self::avx512f::*;

View file

@ -135,3 +135,11 @@ mod x86_polyfill {
pub use coresimd::x86_64::{_mm256_insert_epi64, _mm_insert_epi64};
}
pub use self::x86_polyfill::*;
pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
union A {
a: __m512i,
b: [i32; 16],
}
assert_eq!(A { a }.b, A { a: b }.b)
}

View file

@ -33,6 +33,7 @@
sse4a_target_feature,
arm_target_feature,
aarch64_target_feature,
avx512_target_feature,
mips_target_feature,
powerpc_target_feature,
wasm_target_feature

View file

@ -98,6 +98,10 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
"__m256" => quote! { &M256 },
"__m256d" => quote! { &M256D },
"__m256i" => quote! { &M256I },
"__m512" => quote! { &M512 },
"__m512d" => quote! { &M512D },
"__m512i" => quote! { &M512I },
"__mmask16" => quote! { &MMASK16 },
"__m64" => quote! { &M64 },
"bool" => quote! { &BOOL },
"f32" => quote! { &F32 },

View file

@ -54,6 +54,10 @@ static M128D: Type = Type::M128D;
static M256: Type = Type::M256;
static M256I: Type = Type::M256I;
static M256D: Type = Type::M256D;
static M512: Type = Type::M512;
static M512I: Type = Type::M512I;
static M512D: Type = Type::M512D;
static MMASK16: Type = Type::MMASK16;
static TUPLE: Type = Type::Tuple;
static CPUID: Type = Type::CpuidResult;
@ -72,6 +76,10 @@ enum Type {
M256,
M256D,
M256I,
M512,
M512D,
M512I,
MMASK16,
Tuple,
CpuidResult,
Never,
@ -422,6 +430,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
| (&Type::M256, "__m256")
| (&Type::Ptr(&Type::M256), "__m256*") => {}
(&Type::M512I, "__m512i")
| (&Type::Ptr(&Type::M512I), "__m512i*")
| (&Type::M512D, "__m512d")
| (&Type::Ptr(&Type::M512D), "__m512d*")
| (&Type::M512, "__m512")
| (&Type::Ptr(&Type::M512), "__m512*") => {}
(&Type::MMASK16, "__mmask16") => {}
// This is a macro (?) in C which seems to mutate its arguments, but
// that means that we're taking pointers to arguments in rust
// as we're not exposing it as a macro.