std_detect: Add aarch64/linux/LLVM features

Add detection for various aarch64 CPU features already supported by LLVM and Linux.

This commit adds feature detection for the following features:

- FEAT_CSSC
- FEAT_ECV
- FEAT_FAMINMAX
- FEAT_FLAGM2
- FEAT_FP8
- FEAT_FP8DOT2
- FEAT_FP8DOT4
- FEAT_FP8FMA
- FEAT_HBC
- FEAT_LSE128
- FEAT_LUT
- FEAT_MOPS
- FEAT_LRCPC3
- FEAT_SVE_B16B16
- FEAT_SVE2p1
- FEAT_WFxT

It also adds feature detection for FEAT_FPMR. It is somewhat of a
special case because FPMR only exists as a feature in LLVM 18, it has
been removed from the LLVM upstream. On that account the intention is
for it to be detectable at runtime through stdarch but not have a
corresponding compile-time Rust target feature.

Linux features: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
LLVM features: llvm-project/llvm/lib/Target/AArch64/AArch64.td
This commit is contained in:
Kajetan Puchalski 2024-06-14 17:45:59 +01:00 committed by Amanieu d'Antras
parent aa84427fd4
commit dfc5dfc8ef
4 changed files with 190 additions and 9 deletions

View file

@ -22,22 +22,27 @@ features! {
/// * `"crc"` - FEAT_CRC
/// * `"lse"` - FEAT_LSE
/// * `"lse2"` - FEAT_LSE2
/// * `"lse128"` - FEAT_LSE128
/// * `"rdm"` - FEAT_RDM
/// * `"rcpc"` - FEAT_LRCPC
/// * `"rcpc2"` - FEAT_LRCPC2
/// * `"rcpc3"` - FEAT_LRCPC3
/// * `"dotprod"` - FEAT_DotProd
/// * `"tme"` - FEAT_TME
/// * `"fhm"` - FEAT_FHM
/// * `"dit"` - FEAT_DIT
/// * `"flagm"` - FEAT_FLAGM
/// * `"flagm2"` - FEAT_FLAGM2
/// * `"ssbs"` - FEAT_SSBS & FEAT_SSBS2
/// * `"sb"` - FEAT_SB
/// * `"paca"` - FEAT_PAuth (address authentication)
/// * `"pacg"` - FEAT_Pauth (generic authentication)
/// * `"dpb"` - FEAT_DPB
/// * `"dpb2"` - FEAT_DPB2
/// * `"sve-b16b16"` - FEAT_SVE_B16B16
/// * `"sve2"` - FEAT_SVE2
/// * `"sve2-aes"` - FEAT_SVE2_AES
/// * `"sve2p1"` - FEAT_SVE2p1
/// * `"sve2-aes"` - FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
/// * `"sve2-sm4"` - FEAT_SVE2_SM4
/// * `"sve2-sha3"` - FEAT_SVE2_SHA3
/// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm
@ -55,6 +60,18 @@ features! {
/// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256
/// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3
/// * `"sm4"` - FEAT_SM3 & FEAT_SM4
/// * `"hbc"` - FEAT_HBC
/// * `"mops"` - FEAT_MOPS
/// * `"ecv"` - FEAT_ECV
/// * `"cssc"` - FEAT_CSSC
/// * `"fpmr"` - FEAT_FPMR
/// * `"lut"` - FEAT_LUT
/// * `"faminmax"` - FEAT_FAMINMAX
/// * `"fp8"` - FEAT_FP8
/// * `"fp8fma"` - FEAT_FP8FMA
/// * `"fp8dot4"` - FEAT_FP8DOT4
/// * `"fp8dot2"` - FEAT_FP8DOT2
/// * `"wfxt"` - FEAT_WFxT
///
/// [docs]: https://developer.arm.com/documentation/ddi0487/latest
#[stable(feature = "simd_aarch64", since = "1.60.0")]
@ -67,6 +84,14 @@ features! {
@NO_RUNTIME_DETECTION: "v8.5a";
@NO_RUNTIME_DETECTION: "v8.6a";
@NO_RUNTIME_DETECTION: "v8.7a";
@NO_RUNTIME_DETECTION: "v8.8a";
@NO_RUNTIME_DETECTION: "v8.9a";
@NO_RUNTIME_DETECTION: "v9.1a";
@NO_RUNTIME_DETECTION: "v9.2a";
@NO_RUNTIME_DETECTION: "v9.3a";
@NO_RUNTIME_DETECTION: "v9.4a";
@NO_RUNTIME_DETECTION: "v9.5a";
@NO_RUNTIME_DETECTION: "v9a";
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon";
/// FEAT_AdvSIMD (Advanced SIMD/NEON)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull";
@ -85,12 +110,16 @@ features! {
/// FEAT_LSE (Large System Extension - atomics)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2";
/// FEAT_LSE2 (unaligned and register-pair atomics)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lse128: "lse128";
/// FEAT_LSE128 (128-bit atomics)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm";
/// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc";
/// FEAT_LRCPC (Release consistent Processor consistent)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2";
/// FEAT_LRCPC2 (RCPC with immediate offsets)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] rcpc3: "rcpc3";
/// FEAT_LRCPC3 (RCPC Instructions v3)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod";
/// FEAT_DotProd (Vector Dot-Product - ASIMDDP)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme";
@ -101,6 +130,8 @@ features! {
/// FEAT_DIT (Data Independent Timing instructions)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm";
/// FEAT_FLAGM (flag manipulation instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] flagm2: "flagm2";
/// FEAT_FLAGM2 (flag manipulation instructions)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs";
/// FEAT_SSBS & FEAT_SSBS2 (speculative store bypass safe)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb";
@ -115,14 +146,18 @@ features! {
/// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2";
/// FEAT_SVE2 (Scalable Vector Extension 2)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve2p1: "sve2p1";
/// FEAT_SVE2p1 (Scalable Vector Extension 2.1)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes";
/// FEAT_SVE_AES (SVE2 AES crypto)
/// FEAT_SVE_AES & FEAT_SVE_PMULL128 (SVE2 AES crypto)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4";
/// FEAT_SVE_SM4 (SVE2 SM4 crypto)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3";
/// FEAT_SVE_SHA3 (SVE2 SHA3 crypto)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm";
/// FEAT_SVE_BitPerm (SVE2 bit permutation instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] sve_b16b16: "sve-b16b16";
/// FEAT_SVE_B16B16 (SVE or SME Instructions)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts";
/// FEAT_FRINTTS (float to integer rounding instructions)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm";
@ -151,4 +186,28 @@ features! {
/// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions)
@FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4";
/// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] hbc: "hbc";
/// FEAT_HBC (Hinted conditional branches)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] mops: "mops";
/// FEAT_MOPS (Standardization of memory operations)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] ecv: "ecv";
/// FEAT_ECV (Enhanced Counter Virtualization)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] cssc: "cssc";
/// FEAT_CSSC (Common Short Sequence Compression instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fpmr: "fpmr";
/// FEAT_FPMR (Special-purpose AArch64-FPMR register)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] lut: "lut";
/// FEAT_LUT (Lookup Table Instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] faminmax: "faminmax";
/// FEAT_FAMINMAX (FAMIN and FAMAX SIMD/SVE/SME instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8: "fp8";
/// FEAT_FP8 (F8CVT Instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8fma: "fp8fma";
/// FEAT_FP8FMA (F8FMA Instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot4: "fp8dot4";
/// FEAT_FP8DOT4 (F8DP4 Instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] fp8dot2: "fp8dot2";
/// FEAT_FP8DOT2 (F8DP2 Instructions)
@FEATURE: #[unstable(feature = "stdarch_aarch64_feature_detection", issue = "127764")] wfxt: "wfxt";
/// FEAT_WFxT (WFET and WFIT Instructions)
}

View file

@ -83,11 +83,11 @@ struct AtHwcap {
dcpodp: bool,
sve2: bool,
sveaes: bool,
// svepmull: No LLVM support.
svepmull: bool,
svebitperm: bool,
svesha3: bool,
svesm4: bool,
// flagm2: No LLVM support.
flagm2: bool,
frint: bool,
// svei8mm: See i8mm feature.
svef32mm: bool,
@ -99,6 +99,31 @@ struct AtHwcap {
rng: bool,
bti: bool,
mte: bool,
ecv: bool,
// afp: bool,
// rpres: bool,
// mte3: bool,
wfxt: bool,
// ebf16: bool,
// sveebf16: bool,
cssc: bool,
// rprfm: bool,
sve2p1: bool,
smeb16b16: bool,
mops: bool,
hbc: bool,
sveb16b16: bool,
lrcpc3: bool,
lse128: bool,
fpmr: bool,
lut: bool,
faminmax: bool,
f8cvt: bool,
f8fma: bool,
f8dp4: bool,
f8dp2: bool,
f8e4m3: bool,
f8e5m2: bool,
}
impl From<auxvec::AuxVec> for AtHwcap {
@ -137,14 +162,16 @@ impl From<auxvec::AuxVec> for AtHwcap {
sb: bit::test(auxv.hwcap, 29),
paca: bit::test(auxv.hwcap, 30),
pacg: bit::test(auxv.hwcap, 31),
// AT_HWCAP2
dcpodp: bit::test(auxv.hwcap2, 0),
sve2: bit::test(auxv.hwcap2, 1),
sveaes: bit::test(auxv.hwcap2, 2),
// svepmull: bit::test(auxv.hwcap2, 3),
svepmull: bit::test(auxv.hwcap2, 3),
svebitperm: bit::test(auxv.hwcap2, 4),
svesha3: bit::test(auxv.hwcap2, 5),
svesm4: bit::test(auxv.hwcap2, 6),
// flagm2: bit::test(auxv.hwcap2, 7),
flagm2: bit::test(auxv.hwcap2, 7),
frint: bit::test(auxv.hwcap2, 8),
// svei8mm: bit::test(auxv.hwcap2, 9),
svef32mm: bit::test(auxv.hwcap2, 10),
@ -156,6 +183,31 @@ impl From<auxvec::AuxVec> for AtHwcap {
rng: bit::test(auxv.hwcap2, 16),
bti: bit::test(auxv.hwcap2, 17),
mte: bit::test(auxv.hwcap2, 18),
ecv: bit::test(auxv.hwcap2, 19),
// afp: bit::test(auxv.hwcap2, 20),
// rpres: bit::test(auxv.hwcap2, 21),
// mte3: bit::test(auxv.hwcap2, 22),
wfxt: bit::test(auxv.hwcap2, 31),
// ebf16: bit::test(auxv.hwcap2, 32),
// sveebf16: bit::test(auxv.hwcap2, 33),
cssc: bit::test(auxv.hwcap2, 34),
// rprfm: bit::test(auxv.hwcap2, 35),
sve2p1: bit::test(auxv.hwcap2, 36),
smeb16b16: bit::test(auxv.hwcap2, 41),
mops: bit::test(auxv.hwcap2, 43),
hbc: bit::test(auxv.hwcap2, 44),
sveb16b16: bit::test(auxv.hwcap2, 45),
lrcpc3: bit::test(auxv.hwcap2, 46),
lse128: bit::test(auxv.hwcap2, 47),
fpmr: bit::test(auxv.hwcap2, 48),
lut: bit::test(auxv.hwcap2, 49),
faminmax: bit::test(auxv.hwcap2, 50),
f8cvt: bit::test(auxv.hwcap2, 51),
f8fma: bit::test(auxv.hwcap2, 52),
f8dp4: bit::test(auxv.hwcap2, 53),
f8dp2: bit::test(auxv.hwcap2, 54),
f8e4m3: bit::test(auxv.hwcap2, 55),
f8e5m2: bit::test(auxv.hwcap2, 56),
}
}
}
@ -201,14 +253,16 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
sb: f.has("sb"),
paca: f.has("paca"),
pacg: f.has("pacg"),
// AT_HWCAP2
dcpodp: f.has("dcpodp"),
sve2: f.has("sve2"),
sveaes: f.has("sveaes"),
// svepmull: f.has("svepmull"),
svepmull: f.has("svepmull"),
svebitperm: f.has("svebitperm"),
svesha3: f.has("svesha3"),
svesm4: f.has("svesm4"),
// flagm2: f.has("flagm2"),
flagm2: f.has("flagm2"),
frint: f.has("frint"),
// svei8mm: f.has("svei8mm"),
svef32mm: f.has("svef32mm"),
@ -220,6 +274,31 @@ impl From<super::cpuinfo::CpuInfo> for AtHwcap {
rng: f.has("rng"),
bti: f.has("bti"),
mte: f.has("mte"),
ecv: f.has("ecv"),
// afp: f.has("afp"),
// rpres: f.has("rpres"),
// mte3: f.has("mte3"),
wfxt: f.has("wfxt"),
// ebf16: f.has("ebf16"),
// sveebf16: f.has("sveebf16"),
cssc: f.has("cssc"),
// rprfm: f.has("rprfm"),
sve2p1: f.has("sve2p1"),
smeb16b16: f.has("smeb16b16"),
mops: f.has("mops"),
hbc: f.has("hbc"),
sveb16b16: f.has("sveb16b16"),
lrcpc3: f.has("lrcpc3"),
lse128: f.has("lse128"),
fpmr: f.has("fpmr"),
lut: f.has("lut"),
faminmax: f.has("faminmax"),
f8cvt: f.has("f8cvt"),
f8fma: f.has("f8fma"),
f8dp4: f.has("f8dp4"),
f8dp2: f.has("f8dp2"),
f8e4m3: f.has("f8e4m3"),
f8e5m2: f.has("f8e5m2"),
}
}
}
@ -267,11 +346,14 @@ impl AtHwcap {
enable_feature(Feature::crc, self.crc32);
enable_feature(Feature::lse, self.atomics);
enable_feature(Feature::lse2, self.uscat);
enable_feature(Feature::lse128, self.lse128);
enable_feature(Feature::rcpc, self.lrcpc);
// RCPC2 (rcpc-immo in LLVM) requires RCPC support
enable_feature(Feature::rcpc2, self.ilrcpc && self.lrcpc);
enable_feature(Feature::rcpc3, self.lrcpc3);
enable_feature(Feature::dit, self.dit);
enable_feature(Feature::flagm, self.flagm);
enable_feature(Feature::flagm2, self.flagm2);
enable_feature(Feature::ssbs, self.ssbs);
enable_feature(Feature::sb, self.sb);
enable_feature(Feature::paca, self.paca);
@ -317,8 +399,12 @@ impl AtHwcap {
// SVE2 requires SVE
let sve2 = self.sve2 && self.sve && asimd;
enable_feature(Feature::sve2, sve2);
enable_feature(Feature::sve2p1, self.sve2p1);
// SVE2 extensions require SVE2 and crypto features
enable_feature(Feature::sve2_aes, self.sveaes && sve2 && self.aes);
enable_feature(
Feature::sve2_aes,
self.sveaes && self.svepmull && sve2 && self.aes,
);
enable_feature(
Feature::sve2_sm4,
self.svesm4 && sve2 && self.sm3 && self.sm4,
@ -328,6 +414,23 @@ impl AtHwcap {
self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2,
);
enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2);
// SVE_B16B16 can be implemented either for SVE or SME
enable_feature(
Feature::sve_b16b16,
self.bf16 && (self.sveb16b16 || self.smeb16b16),
);
enable_feature(Feature::hbc, self.hbc);
enable_feature(Feature::mops, self.mops);
enable_feature(Feature::ecv, self.ecv);
enable_feature(Feature::lut, self.lut);
enable_feature(Feature::cssc, self.cssc);
enable_feature(Feature::fpmr, self.fpmr);
enable_feature(Feature::faminmax, self.faminmax);
enable_feature(Feature::fp8, self.f8cvt);
enable_feature(Feature::fp8fma, self.f8fma);
enable_feature(Feature::fp8dot4, self.f8dp4);
enable_feature(Feature::fp8dot2, self.f8dp2);
enable_feature(Feature::wfxt, self.wfxt);
}
value
}

View file

@ -1,6 +1,7 @@
#![allow(internal_features)]
#![feature(stdarch_internal)]
#![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))]
#![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_feature_detection))]
#![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
#![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
#![cfg_attr(
@ -67,21 +68,26 @@ fn aarch64_linux() {
println!("crc: {}", is_aarch64_feature_detected!("crc"));
println!("lse: {}", is_aarch64_feature_detected!("lse"));
println!("lse2: {}", is_aarch64_feature_detected!("lse2"));
println!("lse128: {}", is_aarch64_feature_detected!("lse128"));
println!("rdm: {}", is_aarch64_feature_detected!("rdm"));
println!("rcpc: {}", is_aarch64_feature_detected!("rcpc"));
println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2"));
println!("rcpc3: {}", is_aarch64_feature_detected!("rcpc3"));
println!("dotprod: {}", is_aarch64_feature_detected!("dotprod"));
println!("tme: {}", is_aarch64_feature_detected!("tme"));
println!("fhm: {}", is_aarch64_feature_detected!("fhm"));
println!("dit: {}", is_aarch64_feature_detected!("dit"));
println!("flagm: {}", is_aarch64_feature_detected!("flagm"));
println!("flagm2: {}", is_aarch64_feature_detected!("flagm2"));
println!("ssbs: {}", is_aarch64_feature_detected!("ssbs"));
println!("sb: {}", is_aarch64_feature_detected!("sb"));
println!("paca: {}", is_aarch64_feature_detected!("paca"));
println!("pacg: {}", is_aarch64_feature_detected!("pacg"));
println!("dpb: {}", is_aarch64_feature_detected!("dpb"));
println!("dpb2: {}", is_aarch64_feature_detected!("dpb2"));
println!("sve-b16b16: {}", is_aarch64_feature_detected!("sve-b16b16"));
println!("sve2: {}", is_aarch64_feature_detected!("sve2"));
println!("sve2p1: {}", is_aarch64_feature_detected!("sve2p1"));
println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes"));
println!("sve2-sm4: {}", is_aarch64_feature_detected!("sve2-sm4"));
println!("sve2-sha3: {}", is_aarch64_feature_detected!("sve2-sha3"));
@ -103,6 +109,18 @@ fn aarch64_linux() {
println!("sha2: {}", is_aarch64_feature_detected!("sha2"));
println!("sha3: {}", is_aarch64_feature_detected!("sha3"));
println!("sm4: {}", is_aarch64_feature_detected!("sm4"));
println!("hbc: {}", is_aarch64_feature_detected!("hbc"));
println!("mops: {}", is_aarch64_feature_detected!("mops"));
println!("ecv: {}", is_aarch64_feature_detected!("ecv"));
println!("cssc: {}", is_aarch64_feature_detected!("cssc"));
println!("fpmr: {}", is_aarch64_feature_detected!("fpmr"));
println!("lut: {}", is_aarch64_feature_detected!("lut"));
println!("faminmax: {}", is_aarch64_feature_detected!("faminmax"));
println!("fp8: {}", is_aarch64_feature_detected!("fp8"));
println!("fp8fma: {}", is_aarch64_feature_detected!("fp8fma"));
println!("fp8dot4: {}", is_aarch64_feature_detected!("fp8dot4"));
println!("fp8dot2: {}", is_aarch64_feature_detected!("fp8dot2"));
println!("wfxt: {}", is_aarch64_feature_detected!("wfxt"));
}
#[test]

View file

@ -12,6 +12,7 @@
feature(stdarch_internal)
)]
#![cfg_attr(target_arch = "arm", feature(stdarch_arm_feature_detection))]
#![cfg_attr(target_arch = "aarch64", feature(stdarch_aarch64_feature_detection))]
#![cfg_attr(target_arch = "powerpc", feature(stdarch_powerpc_feature_detection))]
#![cfg_attr(target_arch = "powerpc64", feature(stdarch_powerpc_feature_detection))]
#![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)]