Add feature detection for new amx variants and movrs
This commit is contained in:
parent
97606212ea
commit
fbd13bd08c
3 changed files with 53 additions and 10 deletions
|
|
@ -89,6 +89,11 @@ features! {
|
|||
/// * `"amx-bf16"`
|
||||
/// * `"amx-fp16"`
|
||||
/// * `"amx-complex"`
|
||||
/// * `"amx-avx512"`
|
||||
/// * `"amx-fp8"`
|
||||
/// * `"amx-movrs"`
|
||||
/// * `"amx-tf32"`
|
||||
/// * `"amx-transpose"`
|
||||
/// * `"f16c"`
|
||||
/// * `"fma"`
|
||||
/// * `"bmi1"`
|
||||
|
|
@ -109,6 +114,8 @@ features! {
|
|||
/// * `"rtm"`
|
||||
/// * `"movbe"`
|
||||
/// * `"ermsb"`
|
||||
/// * `"movrs"`
|
||||
/// * `"xop"`
|
||||
///
|
||||
/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
|
||||
#[stable(feature = "simd_x86", since = "1.27.0")]
|
||||
|
|
@ -177,8 +184,7 @@ features! {
|
|||
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi";
|
||||
/// AVX-512 VBMI (Vector Byte Manipulation Instructions)
|
||||
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq";
|
||||
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
|
||||
/// Quadword)
|
||||
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword)
|
||||
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2";
|
||||
/// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities)
|
||||
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni";
|
||||
|
|
@ -217,6 +223,16 @@ features! {
|
|||
/// AMX-FP16 (Float16 Operations)
|
||||
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex";
|
||||
/// AMX-COMPLEX (Complex number Operations)
|
||||
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512";
|
||||
/// AMX-AVX512 (AVX512 operations extended to matrices)
|
||||
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8";
|
||||
/// AMX-FP8 (Float8 Operations)
|
||||
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs";
|
||||
/// AMX-MOVRS (Matrix MOVERS operations)
|
||||
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32";
|
||||
/// AMX-TF32 (TensorFloat32 Operations)
|
||||
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose";
|
||||
/// AMX-TRANSPOSE (Matrix Transpose Operations)
|
||||
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
|
||||
/// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
|
||||
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
|
||||
|
|
@ -253,6 +269,8 @@ features! {
|
|||
/// RTM, Intel (Restricted Transactional Memory)
|
||||
@FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe";
|
||||
/// MOVBE (Move Data After Swapping Bytes)
|
||||
@FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs";
|
||||
/// MOVRS (Move data with the read-shared hint)
|
||||
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb";
|
||||
/// ERMSB, Enhanced REP MOVSB and STOSB
|
||||
@FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop";
|
||||
|
|
|
|||
|
|
@ -141,6 +141,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
|
|||
|
||||
enable(extended_features_ebx, 9, Feature::ermsb);
|
||||
|
||||
enable(extended_features_eax_leaf_1, 31, Feature::movrs);
|
||||
|
||||
// Detect if CPUID.19h available
|
||||
if bit::test(extended_features_ecx as usize, 23) {
|
||||
let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) };
|
||||
|
|
@ -250,14 +252,27 @@ pub(crate) fn detect_features() -> cache::Initializer {
|
|||
enable(extended_features_edx, 8, Feature::avx512vp2intersect);
|
||||
enable(extended_features_edx, 23, Feature::avx512fp16);
|
||||
enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
|
||||
}
|
||||
}
|
||||
|
||||
if os_amx_support {
|
||||
enable(extended_features_edx, 24, Feature::amx_tile);
|
||||
enable(extended_features_edx, 25, Feature::amx_int8);
|
||||
enable(extended_features_edx, 22, Feature::amx_bf16);
|
||||
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
|
||||
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
|
||||
}
|
||||
if os_amx_support {
|
||||
enable(extended_features_edx, 24, Feature::amx_tile);
|
||||
enable(extended_features_edx, 25, Feature::amx_int8);
|
||||
enable(extended_features_edx, 22, Feature::amx_bf16);
|
||||
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
|
||||
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
|
||||
|
||||
if max_basic_leaf >= 0x1e {
|
||||
let CpuidResult {
|
||||
eax: amx_feature_flags_eax,
|
||||
..
|
||||
} = unsafe { __cpuid_count(0x1e_u32, 1) };
|
||||
|
||||
enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
|
||||
enable(amx_feature_flags_eax, 5, Feature::amx_transpose);
|
||||
enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
|
||||
enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
|
||||
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,8 @@
|
|||
sha512_sm_x86,
|
||||
x86_amx_intrinsics,
|
||||
xop_target_feature,
|
||||
keylocker_x86
|
||||
keylocker_x86,
|
||||
movrs_target_feature
|
||||
)]
|
||||
|
||||
extern crate cupid;
|
||||
|
|
@ -97,6 +98,15 @@ fn dump() {
|
|||
println!("xop: {:?}", is_x86_feature_detected!("xop"));
|
||||
println!("kl: {:?}", is_x86_feature_detected!("kl"));
|
||||
println!("widekl: {:?}", is_x86_feature_detected!("widekl"));
|
||||
println!("movrs: {:?}", is_x86_feature_detected!("movrs"));
|
||||
println!("amx-fp8: {:?}", is_x86_feature_detected!("amx-fp8"));
|
||||
println!(
|
||||
"amx-transpose: {:?}",
|
||||
is_x86_feature_detected!("amx-transpose")
|
||||
);
|
||||
println!("amx-tf32: {:?}", is_x86_feature_detected!("amx-tf32"));
|
||||
println!("amx-avx512: {:?}", is_x86_feature_detected!("amx-avx512"));
|
||||
println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs"));
|
||||
}
|
||||
|
||||
#[cfg(feature = "std_detect_env_override")]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue