Start adding some avx512 intrinsics (#618)
First one is the quite simple `_mm512_abs_epi32` intrinsic!
This commit is contained in:
parent
cb921381c4
commit
67f8ed0bf4
9 changed files with 257 additions and 0 deletions
|
|
@ -4,6 +4,9 @@ environment:
|
|||
# default so pass a flag to disable it to ensure our tests work ok.
|
||||
RUSTFLAGS: -Clink-args=/OPT:NOICF
|
||||
|
||||
# VS2017 looks to be the first with avx-512 support, notably in dumpbin
|
||||
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
|
||||
|
||||
matrix:
|
||||
- TARGET: x86_64-pc-windows-msvc
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,9 @@ exclude = [
|
|||
[profile.release]
|
||||
debug = true
|
||||
opt-level = 3
|
||||
incremental = true
|
||||
|
||||
[profile.bench]
|
||||
debug = 1
|
||||
opt-level = 3
|
||||
incremental = true
|
||||
|
|
|
|||
|
|
@ -181,3 +181,11 @@ simd_ty!(i32x8[i32]:
|
|||
i32, i32, i32, i32, i32, i32, i32, i32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7);
|
||||
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
|
||||
|
||||
// 512-bit wide types:
|
||||
|
||||
simd_ty!(i32x16[i32]:
|
||||
i32, i32, i32, i32, i32, i32, i32, i32,
|
||||
i32, i32, i32, i32, i32, i32, i32, i32
|
||||
| x0, x1, x2, x3, x4, x5, x6, x7,
|
||||
x8, x9, x10, x11, x12, x13, x14, x15);
|
||||
|
|
|
|||
189
library/stdarch/coresimd/x86/avx512f.rs
Normal file
189
library/stdarch/coresimd/x86/avx512f.rs
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
use coresimd::simd::*;
|
||||
use coresimd::x86::*;
|
||||
use mem;
|
||||
|
||||
#[cfg(test)]
|
||||
use stdsimd_test::assert_instr;
|
||||
|
||||
/// Computes the absolute values of packed 32-bit integers in `a`.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vpabsd))]
|
||||
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
|
||||
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), -1))
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
|
||||
/// unsigned results in `dst` using writemask `k` (elements are copied from
|
||||
/// `src` when the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vpabsd))]
|
||||
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
|
||||
mem::transmute(pabsd(a.as_i32x16(), src.as_i32x16(), k))
|
||||
}
|
||||
|
||||
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
|
||||
/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
|
||||
/// the corresponding mask bit is not set).
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vpabsd))]
|
||||
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
|
||||
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), k))
|
||||
}
|
||||
|
||||
/// Return vector of type `__m512i` with all elements set to zero.
|
||||
///
|
||||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
#[cfg_attr(test, assert_instr(vxorps))]
|
||||
pub unsafe fn _mm512_setzero_si512() -> __m512i {
|
||||
mem::zeroed()
|
||||
}
|
||||
|
||||
/// Set packed 32-bit integers in `dst` with the supplied values in reverse
|
||||
/// order.
|
||||
#[inline]
|
||||
#[target_feature(enable = "avx512f")]
|
||||
pub unsafe fn _mm512_setr_epi32(
|
||||
e15: i32,
|
||||
e14: i32,
|
||||
e13: i32,
|
||||
e12: i32,
|
||||
e11: i32,
|
||||
e10: i32,
|
||||
e9: i32,
|
||||
e8: i32,
|
||||
e7: i32,
|
||||
e6: i32,
|
||||
e5: i32,
|
||||
e4: i32,
|
||||
e3: i32,
|
||||
e2: i32,
|
||||
e1: i32,
|
||||
e0: i32,
|
||||
) -> __m512i {
|
||||
let r = i32x16(
|
||||
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
|
||||
);
|
||||
mem::transmute(r)
|
||||
}
|
||||
|
||||
#[allow(improper_ctypes)]
|
||||
extern "C" {
|
||||
#[link_name = "llvm.x86.avx512.mask.pabs.d.512"]
|
||||
fn pabsd(a: i32x16, b: i32x16, c: i16) -> i32x16;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std;
|
||||
use stdsimd_test::simd_test;
|
||||
|
||||
use coresimd::x86::*;
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_abs_epi32() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_setr_epi32(
|
||||
0, 1, -1, std::i32::MAX,
|
||||
std::i32::MIN, 100, -100, -32,
|
||||
0, 1, -1, std::i32::MAX,
|
||||
std::i32::MIN, 100, -100, -32,
|
||||
);
|
||||
let r = _mm512_abs_epi32(a);
|
||||
let e = _mm512_setr_epi32(
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
std::i32::MAX,
|
||||
std::i32::MAX.wrapping_add(1),
|
||||
100,
|
||||
100,
|
||||
32,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
std::i32::MAX,
|
||||
std::i32::MAX.wrapping_add(1),
|
||||
100,
|
||||
100,
|
||||
32,
|
||||
);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_mask_abs_epi32() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_setr_epi32(
|
||||
0, 1, -1, std::i32::MAX,
|
||||
std::i32::MIN, 100, -100, -32,
|
||||
0, 1, -1, std::i32::MAX,
|
||||
std::i32::MIN, 100, -100, -32,
|
||||
);
|
||||
let r = _mm512_mask_abs_epi32(a, 0, a);
|
||||
assert_eq_m512i(r, a);
|
||||
let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
|
||||
let e = _mm512_setr_epi32(
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
std::i32::MAX,
|
||||
std::i32::MAX.wrapping_add(1),
|
||||
100,
|
||||
100,
|
||||
32,
|
||||
0,
|
||||
1,
|
||||
-1,
|
||||
std::i32::MAX,
|
||||
std::i32::MIN,
|
||||
100,
|
||||
-100,
|
||||
-32,
|
||||
);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
|
||||
#[simd_test(enable = "avx512f")]
|
||||
unsafe fn test_mm512_maskz_abs_epi32() {
|
||||
#[rustfmt::skip]
|
||||
let a = _mm512_setr_epi32(
|
||||
0, 1, -1, std::i32::MAX,
|
||||
std::i32::MIN, 100, -100, -32,
|
||||
0, 1, -1, std::i32::MAX,
|
||||
std::i32::MIN, 100, -100, -32,
|
||||
);
|
||||
let r = _mm512_maskz_abs_epi32(0, a);
|
||||
assert_eq_m512i(r, _mm512_setzero_si512());
|
||||
let r = _mm512_maskz_abs_epi32(0b11111111, a);
|
||||
let e = _mm512_setr_epi32(
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
std::i32::MAX,
|
||||
std::i32::MAX.wrapping_add(1),
|
||||
100,
|
||||
100,
|
||||
32,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
);
|
||||
assert_eq_m512i(r, e);
|
||||
}
|
||||
}
|
||||
|
|
@ -391,6 +391,10 @@ types! {
|
|||
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
|
||||
}
|
||||
|
||||
/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type __mmask16 = i16;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
#[cfg(test)]
|
||||
|
|
@ -502,6 +506,24 @@ impl m256iExt for __m256i {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdimd_internal", issue = "0")]
|
||||
pub(crate) trait m512iExt: Sized {
|
||||
fn as_m512i(self) -> __m512i;
|
||||
|
||||
#[inline]
|
||||
fn as_i32x16(self) -> ::coresimd::simd::i32x16 {
|
||||
unsafe { mem::transmute(self.as_m512i()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl m512iExt for __m512i {
|
||||
#[inline]
|
||||
fn as_m512i(self) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
mod eflags;
|
||||
pub use self::eflags::*;
|
||||
|
||||
|
|
@ -580,3 +602,6 @@ use stdsimd_test::assert_instr;
|
|||
pub unsafe fn ud2() -> ! {
|
||||
::intrinsics::abort()
|
||||
}
|
||||
|
||||
mod avx512f;
|
||||
pub use self::avx512f::*;
|
||||
|
|
|
|||
|
|
@ -135,3 +135,11 @@ mod x86_polyfill {
|
|||
pub use coresimd::x86_64::{_mm256_insert_epi64, _mm_insert_epi64};
|
||||
}
|
||||
pub use self::x86_polyfill::*;
|
||||
|
||||
pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
|
||||
union A {
|
||||
a: __m512i,
|
||||
b: [i32; 16],
|
||||
}
|
||||
assert_eq!(A { a }.b, A { a: b }.b)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@
|
|||
sse4a_target_feature,
|
||||
arm_target_feature,
|
||||
aarch64_target_feature,
|
||||
avx512_target_feature,
|
||||
mips_target_feature,
|
||||
powerpc_target_feature,
|
||||
wasm_target_feature
|
||||
|
|
|
|||
|
|
@ -98,6 +98,10 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
|
|||
"__m256" => quote! { &M256 },
|
||||
"__m256d" => quote! { &M256D },
|
||||
"__m256i" => quote! { &M256I },
|
||||
"__m512" => quote! { &M512 },
|
||||
"__m512d" => quote! { &M512D },
|
||||
"__m512i" => quote! { &M512I },
|
||||
"__mmask16" => quote! { &MMASK16 },
|
||||
"__m64" => quote! { &M64 },
|
||||
"bool" => quote! { &BOOL },
|
||||
"f32" => quote! { &F32 },
|
||||
|
|
|
|||
|
|
@ -54,6 +54,10 @@ static M128D: Type = Type::M128D;
|
|||
static M256: Type = Type::M256;
|
||||
static M256I: Type = Type::M256I;
|
||||
static M256D: Type = Type::M256D;
|
||||
static M512: Type = Type::M512;
|
||||
static M512I: Type = Type::M512I;
|
||||
static M512D: Type = Type::M512D;
|
||||
static MMASK16: Type = Type::MMASK16;
|
||||
|
||||
static TUPLE: Type = Type::Tuple;
|
||||
static CPUID: Type = Type::CpuidResult;
|
||||
|
|
@ -72,6 +76,10 @@ enum Type {
|
|||
M256,
|
||||
M256D,
|
||||
M256I,
|
||||
M512,
|
||||
M512D,
|
||||
M512I,
|
||||
MMASK16,
|
||||
Tuple,
|
||||
CpuidResult,
|
||||
Never,
|
||||
|
|
@ -422,6 +430,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
|
|||
| (&Type::M256, "__m256")
|
||||
| (&Type::Ptr(&Type::M256), "__m256*") => {}
|
||||
|
||||
(&Type::M512I, "__m512i")
|
||||
| (&Type::Ptr(&Type::M512I), "__m512i*")
|
||||
| (&Type::M512D, "__m512d")
|
||||
| (&Type::Ptr(&Type::M512D), "__m512d*")
|
||||
| (&Type::M512, "__m512")
|
||||
| (&Type::Ptr(&Type::M512), "__m512*") => {}
|
||||
|
||||
(&Type::MMASK16, "__mmask16") => {}
|
||||
|
||||
// This is a macro (?) in C which seems to mutate its arguments, but
|
||||
// that means that we're taking pointers to arguments in rust
|
||||
// as we're not exposing it as a macro.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue