Add HWCAP2 support for AArch64 Linux. (#1335)

This commit is contained in:
Jacob Bramley 2022-09-22 05:31:46 +01:00 committed by GitHub
parent e0e9e96c1d
commit 8a944e5a5f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 211 additions and 101 deletions

View file

@ -12461,30 +12461,30 @@ mod tests {
}
#[simd_test(enable = "neon,i8mm")]
unsafe fn test_vmmlaq_s32() {
let a: i32x4 = i32x4::new(1, 3, 4, 9);
let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
let e: i32x4 = i32x4::new(1, 2, 3, 4);
let a = i32x4::new(1, 3, 4, -0x10000);
let b = i8x16::new(1, 21, 31, 14, 5, 6, -128, 8, 9, 13, 15, 12, 13, -1, 20, 16);
let c = i8x16::new(12, 22, 3, 4, -1, 56, 7, 8, 91, 10, -128, 15, 13, 14, 17, 16);
let e = i32x4::new(123, -5353, 690, -65576);
let r: i32x4 = transmute(vmmlaq_s32(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon,i8mm")]
unsafe fn test_vmmlaq_u32() {
let a: u32x4 = u32x4::new(1, 3, 4, 9);
let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
let e: u32x4 = u32x4::new(1, 2, 3, 4);
let a = u32x4::new(1, 3, 4, 0xffff0000);
let b = u8x16::new(1, 21, 31, 14, 5, 6, 128, 8, 9, 13, 15, 12, 13, 255, 20, 16);
let c = u8x16::new(12, 22, 3, 4, 255, 56, 7, 8, 91, 10, 128, 15, 13, 14, 17, 16);
let e = u32x4::new(3195, 6935, 18354, 4294909144);
let r: u32x4 = transmute(vmmlaq_u32(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}
#[simd_test(enable = "neon,i8mm")]
unsafe fn test_vusmmlaq_s32() {
let a: i32x4 = i32x4::new(1, 3, 4, 9);
let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16);
let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16);
let e: i32x4 = i32x4::new(1, 2, 3, 4);
let a = i32x4::new(1, 3, 4, -0x10000);
let b = u8x16::new(1, 21, 31, 14, 5, 6, 128, 8, 9, 13, 15, 12, 13, 255, 20, 16);
let c = i8x16::new(12, 22, 3, 4, -1, 56, 7, 8, 91, 10, -128, 15, 13, 14, 17, 16);
let e = i32x4::new(1915, -1001, 15026, -61992);
let r: i32x4 = transmute(vusmmlaq_s32(transmute(a), transmute(b), transmute(c)));
assert_eq!(r, e);
}

View file

@ -23,58 +23,62 @@ pub(crate) fn detect_features() -> cache::Initializer {
/// The names match those used for cpuinfo.
///
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
#[derive(Debug, Default, PartialEq)]
struct AtHwcap {
fp: bool, // 0
asimd: bool, // 1
// evtstrm: bool, // 2 No LLVM support
aes: bool, // 3
pmull: bool, // 4
sha1: bool, // 5
sha2: bool, // 6
crc32: bool, // 7
atomics: bool, // 8
fphp: bool, // 9
asimdhp: bool, // 10
// cpuid: bool, // 11 No LLVM support
asimdrdm: bool, // 12
jscvt: bool, // 13
fcma: bool, // 14
lrcpc: bool, // 15
dcpop: bool, // 16
sha3: bool, // 17
sm3: bool, // 18
sm4: bool, // 19
asimddp: bool, // 20
sha512: bool, // 21
sve: bool, // 22
fhm: bool, // 23
dit: bool, // 24
uscat: bool, // 25
ilrcpc: bool, // 26
flagm: bool, // 27
ssbs: bool, // 28
sb: bool, // 29
paca: bool, // 30
pacg: bool, // 31
dcpodp: bool, // 32
sve2: bool, // 33
sveaes: bool, // 34
// svepmull: bool, // 35 No LLVM support
svebitperm: bool, // 36
svesha3: bool, // 37
svesm4: bool, // 38
// flagm2: bool, // 39 No LLVM support
frint: bool, // 40
// svei8mm: bool, // 41 See i8mm feature
svef32mm: bool, // 42
svef64mm: bool, // 43
// svebf16: bool, // 44 See bf16 feature
i8mm: bool, // 45
bf16: bool, // 46
// dgh: bool, // 47 No LLVM support
rng: bool, // 48
bti: bool, // 49
mte: bool, // 50
// AT_HWCAP
fp: bool,
asimd: bool,
// evtstrm: No LLVM support.
aes: bool,
pmull: bool,
sha1: bool,
sha2: bool,
crc32: bool,
atomics: bool,
fphp: bool,
asimdhp: bool,
// cpuid: No LLVM support.
asimdrdm: bool,
jscvt: bool,
fcma: bool,
lrcpc: bool,
dcpop: bool,
sha3: bool,
sm3: bool,
sm4: bool,
asimddp: bool,
sha512: bool,
sve: bool,
fhm: bool,
dit: bool,
uscat: bool,
ilrcpc: bool,
flagm: bool,
ssbs: bool,
sb: bool,
paca: bool,
pacg: bool,
// AT_HWCAP2
dcpodp: bool,
sve2: bool,
sveaes: bool,
// svepmull: No LLVM support.
svebitperm: bool,
svesha3: bool,
svesm4: bool,
// flagm2: No LLVM support.
frint: bool,
// svei8mm: See i8mm feature.
svef32mm: bool,
svef64mm: bool,
// svebf16: See bf16 feature.
i8mm: bool,
bf16: bool,
// dgh: No LLVM support.
rng: bool,
bti: bool,
mte: bool,
}
impl From<auxvec::AuxVec> for AtHwcap {
@ -113,25 +117,25 @@ impl From<auxvec::AuxVec> for AtHwcap {
sb: bit::test(auxv.hwcap, 29),
paca: bit::test(auxv.hwcap, 30),
pacg: bit::test(auxv.hwcap, 31),
dcpodp: bit::test(auxv.hwcap, 32),
sve2: bit::test(auxv.hwcap, 33),
sveaes: bit::test(auxv.hwcap, 34),
// svepmull: bit::test(auxv.hwcap, 35),
svebitperm: bit::test(auxv.hwcap, 36),
svesha3: bit::test(auxv.hwcap, 37),
svesm4: bit::test(auxv.hwcap, 38),
// flagm2: bit::test(auxv.hwcap, 39),
frint: bit::test(auxv.hwcap, 40),
// svei8mm: bit::test(auxv.hwcap, 41),
svef32mm: bit::test(auxv.hwcap, 42),
svef64mm: bit::test(auxv.hwcap, 43),
// svebf16: bit::test(auxv.hwcap, 44),
i8mm: bit::test(auxv.hwcap, 45),
bf16: bit::test(auxv.hwcap, 46),
// dgh: bit::test(auxv.hwcap, 47),
rng: bit::test(auxv.hwcap, 48),
bti: bit::test(auxv.hwcap, 49),
mte: bit::test(auxv.hwcap, 50),
dcpodp: bit::test(auxv.hwcap2, 0),
sve2: bit::test(auxv.hwcap2, 1),
sveaes: bit::test(auxv.hwcap2, 2),
// svepmull: bit::test(auxv.hwcap2, 3),
svebitperm: bit::test(auxv.hwcap2, 4),
svesha3: bit::test(auxv.hwcap2, 5),
svesm4: bit::test(auxv.hwcap2, 6),
// flagm2: bit::test(auxv.hwcap2, 7),
frint: bit::test(auxv.hwcap2, 8),
// svei8mm: bit::test(auxv.hwcap2, 9),
svef32mm: bit::test(auxv.hwcap2, 10),
svef64mm: bit::test(auxv.hwcap2, 11),
// svebf16: bit::test(auxv.hwcap2, 12),
i8mm: bit::test(auxv.hwcap2, 13),
bf16: bit::test(auxv.hwcap2, 14),
// dgh: bit::test(auxv.hwcap2, 15),
rng: bit::test(auxv.hwcap2, 16),
bti: bit::test(auxv.hwcap2, 17),
mte: bit::test(auxv.hwcap2, 18),
}
}
}
@ -288,3 +292,86 @@ impl AtHwcap {
value
}
}
#[cfg(test)]
mod tests {
use super::*;
#[cfg(feature = "std_detect_file_io")]
mod auxv_from_file {
use super::auxvec::auxv_from_file;
use super::*;
// The baseline hwcaps used in the (artificial) auxv test files.
fn baseline_hwcaps() -> AtHwcap {
AtHwcap {
fp: true,
asimd: true,
aes: true,
pmull: true,
sha1: true,
sha2: true,
crc32: true,
atomics: true,
fphp: true,
asimdhp: true,
asimdrdm: true,
lrcpc: true,
dcpop: true,
asimddp: true,
ssbs: true,
..AtHwcap::default()
}
}
#[test]
fn linux_empty_hwcap2_aarch64() {
let file = concat!(
env!("CARGO_MANIFEST_DIR"),
"/src/detect/test_data/linux-empty-hwcap2-aarch64.auxv"
);
println!("file: {}", file);
let v = auxv_from_file(file).unwrap();
println!("HWCAP : 0x{:0x}", v.hwcap);
println!("HWCAP2: 0x{:0x}", v.hwcap2);
assert_eq!(AtHwcap::from(v), baseline_hwcaps());
}
#[test]
fn linux_no_hwcap2_aarch64() {
let file = concat!(
env!("CARGO_MANIFEST_DIR"),
"/src/detect/test_data/linux-no-hwcap2-aarch64.auxv"
);
println!("file: {}", file);
let v = auxv_from_file(file).unwrap();
println!("HWCAP : 0x{:0x}", v.hwcap);
println!("HWCAP2: 0x{:0x}", v.hwcap2);
assert_eq!(AtHwcap::from(v), baseline_hwcaps());
}
#[test]
fn linux_hwcap2_aarch64() {
let file = concat!(
env!("CARGO_MANIFEST_DIR"),
"/src/detect/test_data/linux-hwcap2-aarch64.auxv"
);
println!("file: {}", file);
let v = auxv_from_file(file).unwrap();
println!("HWCAP : 0x{:0x}", v.hwcap);
println!("HWCAP2: 0x{:0x}", v.hwcap2);
assert_eq!(
AtHwcap::from(v),
AtHwcap {
// Some other HWCAP bits.
paca: true,
pacg: true,
// HWCAP2-only bits.
dcpodp: true,
frint: true,
rng: true,
bti: true,
mte: true,
..baseline_hwcaps()
}
);
}
}
}

View file

@ -7,6 +7,7 @@ pub(crate) const AT_NULL: usize = 0;
pub(crate) const AT_HWCAP: usize = 16;
/// Key to access the CPU Hardware capabilities 2 bitfield.
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "powerpc",
target_arch = "powerpc64"
@ -21,6 +22,7 @@ pub(crate) const AT_HWCAP2: usize = 26;
pub(crate) struct AuxVec {
pub hwcap: usize,
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "powerpc",
target_arch = "powerpc64"
@ -64,13 +66,14 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
if let Ok(hwcap) = getauxval(AT_HWCAP) {
// Targets with only AT_HWCAP:
#[cfg(any(
target_arch = "aarch64",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "mips",
target_arch = "mips64"
))]
{
// Zero could indicate that no features were detected, but it's also used to
// indicate an error. In either case, try the fallback.
if hwcap != 0 {
return Ok(AuxVec { hwcap });
}
@ -78,13 +81,18 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
// Targets with AT_HWCAP and AT_HWCAP2:
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "powerpc",
target_arch = "powerpc64"
))]
{
if let Ok(hwcap2) = getauxval(AT_HWCAP2) {
if hwcap != 0 && hwcap2 != 0 {
// Zero could indicate that no features were detected, but it's also used to
// indicate an error. In particular, on many platforms AT_HWCAP2 will be
// legitimately zero, since it contains the most recent feature flags. Use the
// fallback only if no features were detected at all.
if hwcap != 0 || hwcap2 != 0 {
return Ok(AuxVec { hwcap, hwcap2 });
}
}
@ -97,7 +105,6 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
{
// Targets with only AT_HWCAP:
#[cfg(any(
target_arch = "aarch64",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "mips",
@ -105,6 +112,8 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
))]
{
let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
// Zero could indicate that no features were detected, but it's also used to indicate
// an error. In either case, try the fallback.
if hwcap != 0 {
return Ok(AuxVec { hwcap });
}
@ -112,6 +121,7 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
// Targets with AT_HWCAP and AT_HWCAP2:
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "powerpc",
target_arch = "powerpc64"
@ -119,7 +129,11 @@ pub(crate) fn auxv() -> Result<AuxVec, ()> {
{
let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize };
let hwcap2 = unsafe { libc::getauxval(AT_HWCAP2 as libc::c_ulong) as usize };
if hwcap != 0 && hwcap2 != 0 {
// Zero could indicate that no features were detected, but it's also used to indicate
// an error. In particular, on many platforms AT_HWCAP2 will be legitimately zero,
// since it contains the most recent feature flags. Use the fallback only if no
// features were detected at all.
if hwcap != 0 || hwcap2 != 0 {
return Ok(AuxVec { hwcap, hwcap2 });
}
}
@ -158,7 +172,7 @@ fn getauxval(key: usize) -> Result<usize, ()> {
/// Tries to read the auxiliary vector from the `file`. If this fails, this
/// function returns `Err`.
#[cfg(feature = "std_detect_file_io")]
fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
pub(super) fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
let file = super::read_file(file)?;
// See <https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h>.
@ -181,7 +195,6 @@ fn auxv_from_file(file: &str) -> Result<AuxVec, ()> {
fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
// Targets with only AT_HWCAP:
#[cfg(any(
target_arch = "aarch64",
target_arch = "riscv32",
target_arch = "riscv64",
target_arch = "mips",
@ -198,23 +211,25 @@ fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> {
}
// Targets with AT_HWCAP and AT_HWCAP2:
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "powerpc",
target_arch = "powerpc64"
))]
{
let mut hwcap = None;
let mut hwcap2 = None;
// For some platforms, AT_HWCAP2 was added recently, so let it default to zero.
let mut hwcap2 = 0;
for el in buf.chunks(2) {
match el[0] {
AT_NULL => break,
AT_HWCAP => hwcap = Some(el[1]),
AT_HWCAP2 => hwcap2 = Some(el[1]),
AT_HWCAP2 => hwcap2 = el[1],
_ => (),
}
}
if let (Some(hwcap), Some(hwcap2)) = (hwcap, hwcap2) {
if let Some(hwcap) = hwcap {
return Ok(AuxVec { hwcap, hwcap2 });
}
}
@ -256,7 +271,6 @@ mod tests {
// FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv
// does not always contain the AT_HWCAP key under qemu.
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "powerpc",
target_arch = "powerpc64"
@ -271,6 +285,7 @@ mod tests {
// Targets with AT_HWCAP and AT_HWCAP2:
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "powerpc",
target_arch = "powerpc64"
@ -305,24 +320,31 @@ mod tests {
}
#[test]
#[should_panic]
fn linux_macos_vb() {
let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv");
println!("file: {}", file);
// The file contains HWCAP but not HWCAP2. In that case, we treat HWCAP2 as zero.
let v = auxv_from_file(file).unwrap();
// this file is incomplete (contains hwcap but not hwcap2), we
// want to fall back to /proc/cpuinfo in this case, so
// reading should fail. assert_eq!(v.hwcap, 126614527);
// assert_eq!(v.hwcap2, 0);
let _ = v;
assert_eq!(v.hwcap, 126614527);
assert_eq!(v.hwcap2, 0);
}
} else if #[cfg(target_arch = "aarch64")] {
#[test]
fn linux_x64() {
let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-x64-i7-6850k.auxv");
fn linux_artificial_aarch64() {
let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-artificial-aarch64.auxv");
println!("file: {}", file);
let v = auxv_from_file(file).unwrap();
assert_eq!(v.hwcap, 3219913727);
assert_eq!(v.hwcap, 0x0123456789abcdef);
assert_eq!(v.hwcap2, 0x02468ace13579bdf);
}
#[test]
fn linux_no_hwcap2_aarch64() {
let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-no-hwcap2-aarch64.auxv");
println!("file: {}", file);
let v = auxv_from_file(file).unwrap();
// An absent HWCAP2 is treated as zero, and does not prevent acceptance of HWCAP.
assert_ne!(v.hwcap, 0);
assert_eq!(v.hwcap2, 0);
}
}
}
@ -353,6 +375,7 @@ mod tests {
// Targets with AT_HWCAP and AT_HWCAP2:
#[cfg(any(
target_arch = "aarch64",
target_arch = "arm",
target_arch = "powerpc",
target_arch = "powerpc64"