std_detect: Add aarch64/linux/LLVM SME features

Add detection for SME features supported by LLVM and the Linux Kernel.
Include commented-out hwcap fields for features supported by Linux but not by LLVM.

This commit adds feature detection for the following features:

- FEAT_SME
- FEAT_SME_F16F16
- FEAT_SME_F64F64
- FEAT_SME_F8F16
- FEAT_SME_F8F32
- FEAT_SME_FA64
- FEAT_SME_I16I64
- FEAT_SME_LUTv2
- FEAT_SME2
- FEAT_SME2p1
- FEAT_SSVE_FP8DOT2
- FEAT_SSVE_FP8DOT4
- FEAT_SSVE_FP8FMA

Linux features: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
LLVM features: llvm-project/llvm/lib/Target/AArch64/AArch64.td
This commit is contained in:
Kajetan Puchalski 2024-06-17 14:25:16 +01:00 committed by Amanieu d'Antras
parent dfc5dfc8ef
commit ef538bc614
3 changed files with 137 additions and 0 deletions

View file

@ -72,6 +72,19 @@ features! {
/// * `"fp8dot4"` - FEAT_FP8DOT4
/// * `"fp8dot2"` - FEAT_FP8DOT2
/// * `"wfxt"` - FEAT_WFxT
/// * `"sme"` - FEAT_SME
/// * `"sme-i16i64"` - FEAT_SME_I16I64
/// * `"sme-f64f64"` - FEAT_SME_F64F64
/// * `"sme-fa64"` - FEAT_SME_FA64
/// * `"sme2"` - FEAT_SME2
/// * `"sme2p1"` - FEAT_SME2p1
/// * `"sme-f16f16"` - FEAT_SME_F16F16
/// * `"sme-lutv2"` - FEAT_SME_LUTv2
/// * `"sme-f8f16"` - FEAT_SME_F8F16
/// * `"sme-f8f32"` - FEAT_SME_F8F32
/// * `"ssve-fp8fma"` - FEAT_SSVE_FP8FMA
/// * `"ssve-fp8dot4"` - FEAT_SSVE_FP8DOT4
/// * `"ssve-fp8dot2"` - FEAT_SSVE_FP8DOT2
///
/// [docs]: https://developer.arm.com/documentation/ddi0487/latest
#[stable(feature = "simd_aarch64", since = "1.60.0")]
@ -210,4 +223,30 @@ features! {
/// FEAT_FP8DOT2 (F8DP2 Instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt";
/// FEAT_WFxT (WFET and WFIT Instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme: "sme";
/// FEAT_SME (Scalable Matrix Extension)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_i16i64: "sme-i16i64";
/// FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f64f64: "sme-f64f64";
/// FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_fa64: "sme-fa64";
/// FEAT_SME_FA64 (Full A64 instruction set support in Streaming SVE mode)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2: "sme2";
/// FEAT_SME2 (SME Version 2)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme2p1: "sme2p1";
/// FEAT_SME2p1 (SME Version 2.1)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f16f16: "sme-f16f16";
/// FEAT_SME_F16F16 (Non-widening half-precision FP16 to FP16 arithmetic for SME2)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_lutv2: "sme-lutv2";
/// FEAT_SME_LUTv2 (LUTI4 Instruction)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f16: "sme-f8f16";
/// FEAT_SME_F8F16
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sme_f8f32: "sme-f8f32";
/// FEAT_SME_F8F32
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8fma: "ssve-fp8fma";
/// FEAT_SSVE_FP8FMA
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot4: "ssve-fp8dot4";
/// FEAT_SSVE_FP8DOT4
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ssve_fp8dot2: "ssve-fp8dot2";
/// FEAT_SSVE_FP8DOT2
}

View file

@ -103,13 +103,26 @@ struct AtHwcap {
// afp: bool,
// rpres: bool,
// mte3: bool,
sme: bool,
smei16i64: bool,
smef64f64: bool,
// smei8i32: bool,
// smef16f32: bool,
// smeb16f32: bool,
// smef32f32: bool,
smefa64: bool,
wfxt: bool,
// ebf16: bool,
// sveebf16: bool,
cssc: bool,
// rprfm: bool,
sve2p1: bool,
sme2: bool,
sme2p1: bool,
// smei16i32: bool,
// smebi32i32: bool,
smeb16b16: bool,
smef16f16: bool,
mops: bool,
hbc: bool,
sveb16b16: bool,
@ -124,6 +137,12 @@ struct AtHwcap {
f8dp2: bool,
f8e4m3: bool,
f8e5m2: bool,
smelutv2: bool,
smef8f16: bool,
smef8f32: bool,
smesf8fma: bool,
smesf8dp4: bool,
smesf8dp2: bool,
}
impl From<auxvec::AuxVec> for AtHwcap {
@ -187,13 +206,26 @@ impl From<auxvec::AuxVec> for AtHwcap {
// afp: bit::test(auxv.hwcap2, 20),
// rpres: bit::test(auxv.hwcap2, 21),
// mte3: bit::test(auxv.hwcap2, 22),
sme: bit::test(auxv.hwcap2, 23),
smei16i64: bit::test(auxv.hwcap2, 24),
smef64f64: bit::test(auxv.hwcap2, 25),
// smei8i32: bit::test(auxv.hwcap2, 26),
// smef16f32: bit::test(auxv.hwcap2, 27),
// smeb16f32: bit::test(auxv.hwcap2, 28),
// smef32f32: bit::test(auxv.hwcap2, 29),
smefa64: bit::test(auxv.hwcap2, 30),
wfxt: bit::test(auxv.hwcap2, 31),
// ebf16: bit::test(auxv.hwcap2, 32),
// sveebf16: bit::test(auxv.hwcap2, 33),
cssc: bit::test(auxv.hwcap2, 34),
// rprfm: bit::test(auxv.hwcap2, 35),
sve2p1: bit::test(auxv.hwcap2, 36),
sme2: bit::test(auxv.hwcap2, 37),
sme2p1: bit::test(auxv.hwcap2, 38),
// smei16i32: bit::test(auxv.hwcap2, 39),
// smebi32i32: bit::test(auxv.hwcap2, 40),
smeb16b16: bit::test(auxv.hwcap2, 41),
smef16f16: bit::test(auxv.hwcap2, 42),
mops: bit::test(auxv.hwcap2, 43),
hbc: bit::test(auxv.hwcap2, 44),
sveb16b16: bit::test(auxv.hwcap2, 45),
@ -208,6 +240,12 @@ impl From<auxvec::AuxVec> for AtHwcap {
f8dp2: bit::test(auxv.hwcap2, 54),
f8e4m3: bit::test(auxv.hwcap2, 55),
f8e5m2: bit::test(auxv.hwcap2, 56),
smelutv2: bit::test(auxv.hwcap2, 57),
smef8f16: bit::test(auxv.hwcap2, 58),
smef8f32: bit::test(auxv.hwcap2, 59),
smesf8fma: bit::test(auxv.hwcap2, 60),
smesf8dp4: bit::test(auxv.hwcap2, 61),
smesf8dp2: bit::test(auxv.hwcap2, 62),
}
}
}
@ -278,13 +316,26 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
// afp: f.has("afp"),
// rpres: f.has("rpres"),
// mte3: f.has("mte3"),
sme: f.has("sme"),
smei16i64: f.has("smei16i64"),
smef64f64: f.has("smef64f64"),
// smei8i32: f.has("smei8i32"),
// smef16f32: f.has("smef16f32"),
// smeb16f32: f.has("smeb16f32"),
// smef32f32: f.has("smef32f32"),
smefa64: f.has("smefa64"),
wfxt: f.has("wfxt"),
// ebf16: f.has("ebf16"),
// sveebf16: f.has("sveebf16"),
cssc: f.has("cssc"),
// rprfm: f.has("rprfm"),
sve2p1: f.has("sve2p1"),
sme2: f.has("sme2"),
sme2p1: f.has("sme2p1"),
// smei16i32: f.has("smei16i32"),
// smebi32i32: f.has("smebi32i32"),
smeb16b16: f.has("smeb16b16"),
smef16f16: f.has("smef16f16"),
mops: f.has("mops"),
hbc: f.has("hbc"),
sveb16b16: f.has("sveb16b16"),
@ -299,6 +350,12 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
f8dp2: f.has("f8dp2"),
f8e4m3: f.has("f8e4m3"),
f8e5m2: f.has("f8e5m2"),
smelutv2: f.has("smelutv2"),
smef8f16: f.has("smef8f16"),
smef8f32: f.has("smef8f32"),
smesf8fma: f.has("smesf8fma"),
smesf8dp4: f.has("smesf8dp4"),
smesf8dp2: f.has("smesf8dp2"),
}
}
}
@ -431,6 +488,25 @@ impl AtHwcap {
enable_feature(Feature::fp8dot4, self.f8dp4);
enable_feature(Feature::fp8dot2, self.f8dp2);
enable_feature(Feature::wfxt, self.wfxt);
enable_feature(Feature::sme, self.sme && self.bf16);
enable_feature(Feature::sme_i16i64, self.smei16i64 && self.sme);
enable_feature(Feature::sme_f64f64, self.smef64f64 && self.sme);
// enable_feature(Feature::sme_i8i32, self.smei8i32);
// enable_feature(Feature::sme_f16f32, self.smef16f32);
// enable_feature(Feature::sme_b16f32, self.smeb16f32);
// enable_feature(Feature::sme_f32f32, self.smef32f32);
enable_feature(Feature::sme_fa64, self.smefa64 && self.sme && sve2);
enable_feature(Feature::sme2, self.sme2 && self.sme);
enable_feature(Feature::sme2p1, self.sme2p1 && self.sme2 && self.sme);
// enable_feature(Feature::sme_i16i32, self.smei16i32);
// enable_feature(Feature::sme_bi32i32, self.smebi32i32);
enable_feature(Feature::sme_f16f16, self.smef16f16);
enable_feature(Feature::sme_lutv2, self.smelutv2);
enable_feature(Feature::sme_f8f16, self.smef8f16 && self.sme2 && self.f8cvt);
enable_feature(Feature::sme_f8f32, self.smef8f32 && self.sme2 && self.f8cvt);
enable_feature(Feature::ssve_fp8fma, self.smesf8fma && self.sme2);
enable_feature(Feature::ssve_fp8dot4, self.smesf8dp4 && self.sme2);
enable_feature(Feature::ssve_fp8dot2, self.smesf8dp2 && self.sme2);
}
value
}

View file

@ -121,6 +121,28 @@ fn aarch64_linux() {
println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4"));
println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2"));
println!("wfxt: {}", is_aarch64_feature_detected!("wfxt"));
println!("sme: {}", is_aarch64_feature_detected!("sme"));
println!("sme-i16i64: {}", is_aarch64_feature_detected!("sme-i16i64"));
println!("sme-f64f64: {}", is_aarch64_feature_detected!("sme-f64f64"));
println!("sme-fa64: {}", is_aarch64_feature_detected!("sme-fa64"));
println!("sme2: {}", is_aarch64_feature_detected!("sme2"));
println!("sme2p1: {}", is_aarch64_feature_detected!("sme2p1"));
println!("sme-f16f16: {}", is_aarch64_feature_detected!("sme-f16f16"));
println!("sme-lutv2: {}", is_aarch64_feature_detected!("sme-lutv2"));
println!("sme-f8f16: {}", is_aarch64_feature_detected!("sme-f8f16"));
println!("sme-f8f32: {}", is_aarch64_feature_detected!("sme-f8f32"));
println!(
"ssve-fp8fma: {}",
is_aarch64_feature_detected!("ssve-fp8fma")
);
println!(
"ssve-fp8dot4: {}",
is_aarch64_feature_detected!("ssve-fp8dot4")
);
println!(
"ssve-fp8dot2: {}",
is_aarch64_feature_detected!("ssve-fp8dot2")
);
}
#[test]