diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index de89d77e5e2c..ae641871279b 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -465,6 +465,7 @@ const fn is_ascii(s: &[u8]) -> bool { const SSE2_CHUNK_SIZE: usize = 64; #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] +#[inline] fn is_ascii_sse2(bytes: &[u8]) -> bool { use crate::arch::x86_64::{__m128i, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128}; diff --git a/tests/assembly-llvm/slice-is-ascii.rs b/tests/assembly-llvm/slice-is-ascii.rs index 00deb23e9a6c..b9a520505498 100644 --- a/tests/assembly-llvm/slice-is-ascii.rs +++ b/tests/assembly-llvm/slice-is-ascii.rs @@ -1,13 +1,28 @@ -//@ revisions: LA64 +//@ revisions: X86_64 LA64 //@ assembly-output: emit-asm //@ compile-flags: -C opt-level=3 // +//@ [X86_64] only-x86_64 +//@ [X86_64] compile-flags: -C target-cpu=znver4 +//@ [X86_64] compile-flags: -C llvm-args=-x86-asm-syntax=intel +// //@ [LA64] only-loongarch64 #![crate_type = "lib"] +/// Verify `is_ascii` generates efficient code on different architectures: +/// +/// - x86_64: Must NOT use `kshiftrd`/`kshiftrq` (broken AVX-512 auto-vectorization). +/// Good version uses explicit SSE2 intrinsics (`pmovmskb`/`vpmovmskb`). +/// /// - loongarch64: Should use `vmskltz.b` instruction for the fast-path. +// X86_64-LABEL: test_is_ascii +// X86_64-NOT: kshiftrd +// X86_64-NOT: kshiftrq +// X86_64: {{vpor|por}} +// X86_64: {{vpmovmskb|pmovmskb}} + // LA64-LABEL: test_is_ascii // LA64: vmskltz.b