Implement avx512bf16 intrinsics (#998)
This commit is contained in:
parent
54ce616d6b
commit
936e1add97
9 changed files with 1180 additions and 4 deletions
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:18.04
|
||||
FROM ubuntu:20.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:18.04
|
||||
FROM ubuntu:20.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc-multilib \
|
||||
libc6-dev \
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:18.04
|
||||
FROM ubuntu:20.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
|
|
@ -10,4 +10,5 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
|
||||
RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.35.0-2019-03-11-lin.tar.bz2
|
||||
RUN tar -xjf sde-external-8.35.0-2019-03-11-lin.tar.bz2
|
||||
ENV SKIP_TESTS="avx512bf16"
|
||||
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.35.0-2019-03-11-lin/sde64 -rtm_mode full --"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
FROM ubuntu:18.04
|
||||
FROM ubuntu:20.04
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
gcc \
|
||||
libc6-dev \
|
||||
|
|
|
|||
|
|
@ -56,6 +56,9 @@ cargo_test() {
|
|||
;;
|
||||
esac
|
||||
|
||||
if [ "$SKIP_TESTS" != "" ]; then
|
||||
cmd="$cmd --skip "$SKIP_TESTS
|
||||
fi
|
||||
$cmd
|
||||
}
|
||||
|
||||
|
|
|
|||
1018
library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
Normal file
1018
library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -296,6 +296,37 @@ types! {
|
|||
/// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with
|
||||
/// "ps" which is used for `__m512`.
|
||||
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
|
||||
|
||||
/// 128-bit wide set of eight 'u16' types, x86-specific
|
||||
///
|
||||
/// This type is representing a 128-bit SIMD register which internally is consisted of
|
||||
/// eight packed `u16` instances. It's purpose is for bf16 related intrinsic
|
||||
/// implementations.
|
||||
pub struct __m128bh(u16, u16, u16, u16, u16, u16, u16, u16);
|
||||
|
||||
/// 256-bit wide set of 16 'u16' types, x86-specific
|
||||
///
|
||||
/// This type is the same as the `__m128bh` type defined by Intel,
|
||||
/// representing a 256-bit SIMD register which internally is consisted of
|
||||
/// 16 packed `u16` instances. It's purpose is for bf16 related intrinsic
|
||||
/// implementations.
|
||||
pub struct __m256bh(
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
u16, u16, u16, u16, u16, u16, u16, u16
|
||||
);
|
||||
|
||||
/// 512-bit wide set of 32 'u16' types, x86-specific
|
||||
///
|
||||
/// This type is the same as the `__m128bh` type defined by Intel,
|
||||
/// representing a 512-bit SIMD register which internally is consisted of
|
||||
/// 32 packed `u16` instances. It's purpose is for bf16 related intrinsic
|
||||
/// implementations.
|
||||
pub struct __m512bh(
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
u16, u16, u16, u16, u16, u16, u16, u16,
|
||||
u16, u16, u16, u16, u16, u16, u16, u16
|
||||
);
|
||||
}
|
||||
|
||||
/// The `__mmask64` type used in AVX-512 intrinsics, a 64-bit integer
|
||||
|
|
@ -602,6 +633,105 @@ impl m512dExt for __m512d {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdsimd_internal", issue = "none")]
|
||||
pub(crate) trait m128bhExt: Sized {
|
||||
fn as_m128bh(self) -> __m128bh;
|
||||
|
||||
#[inline]
|
||||
fn as_u16x8(self) -> crate::core_arch::simd::u16x8 {
|
||||
unsafe { transmute(self.as_m128bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i16x8(self) -> crate::core_arch::simd::i16x8 {
|
||||
unsafe { transmute(self.as_m128bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u32x4(self) -> crate::core_arch::simd::u32x4 {
|
||||
unsafe { transmute(self.as_m128bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i32x4(self) -> crate::core_arch::simd::i32x4 {
|
||||
unsafe { transmute(self.as_m128bh()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl m128bhExt for __m128bh {
|
||||
#[inline]
|
||||
fn as_m128bh(self) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdsimd_internal", issue = "none")]
|
||||
pub(crate) trait m256bhExt: Sized {
|
||||
fn as_m256bh(self) -> __m256bh;
|
||||
|
||||
#[inline]
|
||||
fn as_u16x16(self) -> crate::core_arch::simd::u16x16 {
|
||||
unsafe { transmute(self.as_m256bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i16x16(self) -> crate::core_arch::simd::i16x16 {
|
||||
unsafe { transmute(self.as_m256bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u32x8(self) -> crate::core_arch::simd::u32x8 {
|
||||
unsafe { transmute(self.as_m256bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i32x8(self) -> crate::core_arch::simd::i32x8 {
|
||||
unsafe { transmute(self.as_m256bh()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl m256bhExt for __m256bh {
|
||||
#[inline]
|
||||
fn as_m256bh(self) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
#[unstable(feature = "stdsimd_internal", issue = "none")]
|
||||
pub(crate) trait m512bhExt: Sized {
|
||||
fn as_m512bh(self) -> __m512bh;
|
||||
|
||||
#[inline]
|
||||
fn as_u16x32(self) -> crate::core_arch::simd::u16x32 {
|
||||
unsafe { transmute(self.as_m512bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i16x32(self) -> crate::core_arch::simd::i16x32 {
|
||||
unsafe { transmute(self.as_m512bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_u32x16(self) -> crate::core_arch::simd::u32x16 {
|
||||
unsafe { transmute(self.as_m512bh()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
|
||||
unsafe { transmute(self.as_m512bh()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl m512bhExt for __m512bh {
|
||||
#[inline]
|
||||
fn as_m512bh(self) -> Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
mod eflags;
|
||||
pub use self::eflags::*;
|
||||
|
||||
|
|
@ -725,3 +855,6 @@ pub use self::rtm::*;
|
|||
|
||||
mod f16c;
|
||||
pub use self::f16c::*;
|
||||
|
||||
mod avx512bf16;
|
||||
pub use self::avx512bf16::*;
|
||||
|
|
|
|||
|
|
@ -137,12 +137,15 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
|
|||
syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() {
|
||||
// x86 ...
|
||||
"__m128" => quote! { &M128 },
|
||||
"__m128bh" => quote! { &M128BH },
|
||||
"__m128d" => quote! { &M128D },
|
||||
"__m128i" => quote! { &M128I },
|
||||
"__m256" => quote! { &M256 },
|
||||
"__m256bh" => quote! { &M256BH },
|
||||
"__m256d" => quote! { &M256D },
|
||||
"__m256i" => quote! { &M256I },
|
||||
"__m512" => quote! { &M512 },
|
||||
"__m512bh" => quote! { &M512BH },
|
||||
"__m512d" => quote! { &M512D },
|
||||
"__m512i" => quote! { &M512I },
|
||||
"__mmask8" => quote! { &MMASK8 },
|
||||
|
|
|
|||
|
|
@ -45,12 +45,15 @@ static ORDERING: Type = Type::Ordering;
|
|||
|
||||
static M64: Type = Type::M64;
|
||||
static M128: Type = Type::M128;
|
||||
static M128BH: Type = Type::M128BH;
|
||||
static M128I: Type = Type::M128I;
|
||||
static M128D: Type = Type::M128D;
|
||||
static M256: Type = Type::M256;
|
||||
static M256BH: Type = Type::M256BH;
|
||||
static M256I: Type = Type::M256I;
|
||||
static M256D: Type = Type::M256D;
|
||||
static M512: Type = Type::M512;
|
||||
static M512BH: Type = Type::M512BH;
|
||||
static M512I: Type = Type::M512I;
|
||||
static M512D: Type = Type::M512D;
|
||||
static MMASK8: Type = Type::MMASK8;
|
||||
|
|
@ -75,12 +78,15 @@ enum Type {
|
|||
ConstPtr(&'static Type),
|
||||
M64,
|
||||
M128,
|
||||
M128BH,
|
||||
M128D,
|
||||
M128I,
|
||||
M256,
|
||||
M256BH,
|
||||
M256D,
|
||||
M256I,
|
||||
M512,
|
||||
M512BH,
|
||||
M512D,
|
||||
M512I,
|
||||
MMASK8,
|
||||
|
|
@ -493,6 +499,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
|
|||
// The intrinsics guide calls `f16c` `fp16c` in disagreement with
|
||||
// Intel's architecture manuals.
|
||||
"fp16c" => String::from("f16c"),
|
||||
"avx512_bf16" => String::from("avx512bf16"),
|
||||
// The XML file names VNNI as "avx512_bf16", while Rust calls
|
||||
// it "avx512bf16".
|
||||
_ => cpuid,
|
||||
};
|
||||
let fixed_cpuid = fixup_cpuid(cpuid);
|
||||
|
|
@ -693,12 +702,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
|
|||
(&Type::PrimUnsigned(8), "unsigned char") => {}
|
||||
(&Type::M64, "__m64") => {}
|
||||
(&Type::M128, "__m128") => {}
|
||||
(&Type::M128BH, "__m128bh") => {}
|
||||
(&Type::M128I, "__m128i") => {}
|
||||
(&Type::M128D, "__m128d") => {}
|
||||
(&Type::M256, "__m256") => {}
|
||||
(&Type::M256BH, "__m256bh") => {}
|
||||
(&Type::M256I, "__m256i") => {}
|
||||
(&Type::M256D, "__m256d") => {}
|
||||
(&Type::M512, "__m512") => {}
|
||||
(&Type::M512BH, "__m512bh") => {}
|
||||
(&Type::M512I, "__m512i") => {}
|
||||
(&Type::M512D, "__m512d") => {}
|
||||
(&Type::MMASK64, "__mmask64") => {}
|
||||
|
|
@ -726,12 +738,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
|
|||
(&Type::MutPtr(&Type::PrimUnsigned(64)), "__mmask64*") => {}
|
||||
(&Type::MutPtr(&Type::M64), "__m64*") => {}
|
||||
(&Type::MutPtr(&Type::M128), "__m128*") => {}
|
||||
(&Type::MutPtr(&Type::M128BH), "__m128bh*") => {}
|
||||
(&Type::MutPtr(&Type::M128I), "__m128i*") => {}
|
||||
(&Type::MutPtr(&Type::M128D), "__m128d*") => {}
|
||||
(&Type::MutPtr(&Type::M256), "__m256*") => {}
|
||||
(&Type::MutPtr(&Type::M256BH), "__m256bh*") => {}
|
||||
(&Type::MutPtr(&Type::M256I), "__m256i*") => {}
|
||||
(&Type::MutPtr(&Type::M256D), "__m256d*") => {}
|
||||
(&Type::MutPtr(&Type::M512), "__m512*") => {}
|
||||
(&Type::MutPtr(&Type::M512BH), "__m512bh*") => {}
|
||||
(&Type::MutPtr(&Type::M512I), "__m512i*") => {}
|
||||
(&Type::MutPtr(&Type::M512D), "__m512d*") => {}
|
||||
|
||||
|
|
@ -754,12 +769,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
|
|||
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "void const*") => {}
|
||||
(&Type::ConstPtr(&Type::M64), "__m64 const*") => {}
|
||||
(&Type::ConstPtr(&Type::M128), "__m128 const*") => {}
|
||||
(&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {}
|
||||
(&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}
|
||||
(&Type::ConstPtr(&Type::M128D), "__m128d const*") => {}
|
||||
(&Type::ConstPtr(&Type::M256), "__m256 const*") => {}
|
||||
(&Type::ConstPtr(&Type::M256BH), "__m256bh const*") => {}
|
||||
(&Type::ConstPtr(&Type::M256I), "__m256i const*") => {}
|
||||
(&Type::ConstPtr(&Type::M256D), "__m256d const*") => {}
|
||||
(&Type::ConstPtr(&Type::M512), "__m512 const*") => {}
|
||||
(&Type::ConstPtr(&Type::M512BH), "__m512bh const*") => {}
|
||||
(&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
|
||||
(&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}
|
||||
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "__mmask32*") => {}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue