Implement avx512bf16 intrinsics (#998)

This commit is contained in:
kangshan1157 2021-02-11 07:29:27 +08:00 committed by GitHub
parent 54ce616d6b
commit 936e1add97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 1180 additions and 4 deletions

View file

@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \

View file

@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc-multilib \
libc6-dev \

View file

@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \
@ -10,4 +10,5 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
RUN wget https://github.com/gnzlbg/intel_sde/raw/master/sde-external-8.35.0-2019-03-11-lin.tar.bz2
RUN tar -xjf sde-external-8.35.0-2019-03-11-lin.tar.bz2
ENV SKIP_TESTS="avx512bf16"
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-8.35.0-2019-03-11-lin/sde64 -rtm_mode full --"

View file

@ -1,4 +1,4 @@
FROM ubuntu:18.04
FROM ubuntu:20.04
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libc6-dev \

View file

@ -56,6 +56,9 @@ cargo_test() {
;;
esac
if [ "$SKIP_TESTS" != "" ]; then
cmd="$cmd --skip "$SKIP_TESTS
fi
$cmd
}

File diff suppressed because it is too large Load diff

View file

@ -296,6 +296,37 @@ types! {
/// suffixed with "pd" (or otherwise contain "pd"). Not to be confused with
/// "ps" which is used for `__m512`.
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
/// 128-bit wide set of eight 'u16' types, x86-specific
///
/// This type is representing a 128-bit SIMD register which internally is consisted of
/// eight packed `u16` instances. It's purpose is for bf16 related intrinsic
/// implementations.
pub struct __m128bh(u16, u16, u16, u16, u16, u16, u16, u16);
/// 256-bit wide set of 16 'u16' types, x86-specific
///
/// This type is the same as the `__m128bh` type defined by Intel,
/// representing a 256-bit SIMD register which internally is consisted of
/// 16 packed `u16` instances. It's purpose is for bf16 related intrinsic
/// implementations.
pub struct __m256bh(
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16
);
/// 512-bit wide set of 32 'u16' types, x86-specific
///
/// This type is the same as the `__m128bh` type defined by Intel,
/// representing a 512-bit SIMD register which internally is consisted of
/// 32 packed `u16` instances. It's purpose is for bf16 related intrinsic
/// implementations.
pub struct __m512bh(
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16,
u16, u16, u16, u16, u16, u16, u16, u16
);
}
/// The `__mmask64` type used in AVX-512 intrinsics, a 64-bit integer
@ -602,6 +633,105 @@ impl m512dExt for __m512d {
}
}
#[allow(non_camel_case_types)]
#[unstable(feature = "stdsimd_internal", issue = "none")]
pub(crate) trait m128bhExt: Sized {
fn as_m128bh(self) -> __m128bh;
#[inline]
fn as_u16x8(self) -> crate::core_arch::simd::u16x8 {
unsafe { transmute(self.as_m128bh()) }
}
#[inline]
fn as_i16x8(self) -> crate::core_arch::simd::i16x8 {
unsafe { transmute(self.as_m128bh()) }
}
#[inline]
fn as_u32x4(self) -> crate::core_arch::simd::u32x4 {
unsafe { transmute(self.as_m128bh()) }
}
#[inline]
fn as_i32x4(self) -> crate::core_arch::simd::i32x4 {
unsafe { transmute(self.as_m128bh()) }
}
}
impl m128bhExt for __m128bh {
#[inline]
fn as_m128bh(self) -> Self {
self
}
}
#[allow(non_camel_case_types)]
#[unstable(feature = "stdsimd_internal", issue = "none")]
pub(crate) trait m256bhExt: Sized {
fn as_m256bh(self) -> __m256bh;
#[inline]
fn as_u16x16(self) -> crate::core_arch::simd::u16x16 {
unsafe { transmute(self.as_m256bh()) }
}
#[inline]
fn as_i16x16(self) -> crate::core_arch::simd::i16x16 {
unsafe { transmute(self.as_m256bh()) }
}
#[inline]
fn as_u32x8(self) -> crate::core_arch::simd::u32x8 {
unsafe { transmute(self.as_m256bh()) }
}
#[inline]
fn as_i32x8(self) -> crate::core_arch::simd::i32x8 {
unsafe { transmute(self.as_m256bh()) }
}
}
impl m256bhExt for __m256bh {
#[inline]
fn as_m256bh(self) -> Self {
self
}
}
#[allow(non_camel_case_types)]
#[unstable(feature = "stdsimd_internal", issue = "none")]
pub(crate) trait m512bhExt: Sized {
fn as_m512bh(self) -> __m512bh;
#[inline]
fn as_u16x32(self) -> crate::core_arch::simd::u16x32 {
unsafe { transmute(self.as_m512bh()) }
}
#[inline]
fn as_i16x32(self) -> crate::core_arch::simd::i16x32 {
unsafe { transmute(self.as_m512bh()) }
}
#[inline]
fn as_u32x16(self) -> crate::core_arch::simd::u32x16 {
unsafe { transmute(self.as_m512bh()) }
}
#[inline]
fn as_i32x16(self) -> crate::core_arch::simd::i32x16 {
unsafe { transmute(self.as_m512bh()) }
}
}
impl m512bhExt for __m512bh {
#[inline]
fn as_m512bh(self) -> Self {
self
}
}
mod eflags;
pub use self::eflags::*;
@ -725,3 +855,6 @@ pub use self::rtm::*;
mod f16c;
pub use self::f16c::*;
mod avx512bf16;
pub use self::avx512bf16::*;

View file

@ -137,12 +137,15 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
syn::Type::Path(ref p) => match extract_path_ident(&p.path).to_string().as_ref() {
// x86 ...
"__m128" => quote! { &M128 },
"__m128bh" => quote! { &M128BH },
"__m128d" => quote! { &M128D },
"__m128i" => quote! { &M128I },
"__m256" => quote! { &M256 },
"__m256bh" => quote! { &M256BH },
"__m256d" => quote! { &M256D },
"__m256i" => quote! { &M256I },
"__m512" => quote! { &M512 },
"__m512bh" => quote! { &M512BH },
"__m512d" => quote! { &M512D },
"__m512i" => quote! { &M512I },
"__mmask8" => quote! { &MMASK8 },

View file

@ -45,12 +45,15 @@ static ORDERING: Type = Type::Ordering;
static M64: Type = Type::M64;
static M128: Type = Type::M128;
static M128BH: Type = Type::M128BH;
static M128I: Type = Type::M128I;
static M128D: Type = Type::M128D;
static M256: Type = Type::M256;
static M256BH: Type = Type::M256BH;
static M256I: Type = Type::M256I;
static M256D: Type = Type::M256D;
static M512: Type = Type::M512;
static M512BH: Type = Type::M512BH;
static M512I: Type = Type::M512I;
static M512D: Type = Type::M512D;
static MMASK8: Type = Type::MMASK8;
@ -75,12 +78,15 @@ enum Type {
ConstPtr(&'static Type),
M64,
M128,
M128BH,
M128D,
M128I,
M256,
M256BH,
M256D,
M256I,
M512,
M512BH,
M512D,
M512I,
MMASK8,
@ -493,6 +499,9 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
// The intrinsics guide calls `f16c` `fp16c` in disagreement with
// Intel's architecture manuals.
"fp16c" => String::from("f16c"),
"avx512_bf16" => String::from("avx512bf16"),
// The XML file names VNNI as "avx512_bf16", while Rust calls
// it "avx512bf16".
_ => cpuid,
};
let fixed_cpuid = fixup_cpuid(cpuid);
@ -693,12 +702,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::PrimUnsigned(8), "unsigned char") => {}
(&Type::M64, "__m64") => {}
(&Type::M128, "__m128") => {}
(&Type::M128BH, "__m128bh") => {}
(&Type::M128I, "__m128i") => {}
(&Type::M128D, "__m128d") => {}
(&Type::M256, "__m256") => {}
(&Type::M256BH, "__m256bh") => {}
(&Type::M256I, "__m256i") => {}
(&Type::M256D, "__m256d") => {}
(&Type::M512, "__m512") => {}
(&Type::M512BH, "__m512bh") => {}
(&Type::M512I, "__m512i") => {}
(&Type::M512D, "__m512d") => {}
(&Type::MMASK64, "__mmask64") => {}
@ -726,12 +738,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::MutPtr(&Type::PrimUnsigned(64)), "__mmask64*") => {}
(&Type::MutPtr(&Type::M64), "__m64*") => {}
(&Type::MutPtr(&Type::M128), "__m128*") => {}
(&Type::MutPtr(&Type::M128BH), "__m128bh*") => {}
(&Type::MutPtr(&Type::M128I), "__m128i*") => {}
(&Type::MutPtr(&Type::M128D), "__m128d*") => {}
(&Type::MutPtr(&Type::M256), "__m256*") => {}
(&Type::MutPtr(&Type::M256BH), "__m256bh*") => {}
(&Type::MutPtr(&Type::M256I), "__m256i*") => {}
(&Type::MutPtr(&Type::M256D), "__m256d*") => {}
(&Type::MutPtr(&Type::M512), "__m512*") => {}
(&Type::MutPtr(&Type::M512BH), "__m512bh*") => {}
(&Type::MutPtr(&Type::M512I), "__m512i*") => {}
(&Type::MutPtr(&Type::M512D), "__m512d*") => {}
@ -754,12 +769,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "void const*") => {}
(&Type::ConstPtr(&Type::M64), "__m64 const*") => {}
(&Type::ConstPtr(&Type::M128), "__m128 const*") => {}
(&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {}
(&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}
(&Type::ConstPtr(&Type::M128D), "__m128d const*") => {}
(&Type::ConstPtr(&Type::M256), "__m256 const*") => {}
(&Type::ConstPtr(&Type::M256BH), "__m256bh const*") => {}
(&Type::ConstPtr(&Type::M256I), "__m256i const*") => {}
(&Type::ConstPtr(&Type::M256D), "__m256d const*") => {}
(&Type::ConstPtr(&Type::M512), "__m512 const*") => {}
(&Type::ConstPtr(&Type::M512BH), "__m512bh const*") => {}
(&Type::ConstPtr(&Type::M512I), "__m512i const*") => {}
(&Type::ConstPtr(&Type::M512D), "__m512d const*") => {}
(&Type::ConstPtr(&Type::PrimUnsigned(32)), "__mmask32*") => {}