Rollup merge of #146699 - heiher:is-ascii-lsx, r=Mark-Simulacrum

Add `is_ascii` function optimized for LoongArch64 for [u8]

Similar to x86_64, on LoongArch64 we use the `vmskltz.b` instruction to test the high bit in a lane.

For longer input cases, the performance improvement is significant. For unaligned cases close to 32 bytes in length, there's some regression, but it seems acceptable.

| core benches (MB/s)                                    | Before | After  | %       |
|--------------------------------------------------------|--------|--------|---------|
| ascii::is_ascii::short::case00_libcore                 | 1000   | 1000   | 0.00    |
| ascii::is_ascii::medium::case00_libcore                | 8000   | 8000   | 0.00    |
| ascii::is_ascii::long::case00_libcore                  | 183947 | 436875 | +137.50 |
| ascii::is_ascii::unaligned_head_medium::case00_libcore | 7750   | 2818   | -63.64  |
| ascii::is_ascii::unaligned_head_long::case00_libcore   | 317681 | 436812 | +37.50  |
| ascii::is_ascii::unaligned_tail_medium::case00_libcore | 7750   | 3444   | -55.56  |
| ascii::is_ascii::unaligned_tail_long::case00_libcore   | 155311 | 436812 | +181.25 |
| ascii::is_ascii::unaligned_both_medium::case00_libcore | 7500   | 3333   | -55.56  |
| ascii::is_ascii::unaligned_both_long::case00_libcore   | 174700 | 436750 | +150.00 |
This commit is contained in:
Matthias Krüger 2025-11-02 20:21:01 +01:00 committed by GitHub
commit 4228b53618
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -3,7 +3,10 @@
use core::ascii::EscapeDefault;
use crate::fmt::{self, Write};
#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2")))]
#[cfg(not(any(
all(target_arch = "x86_64", target_feature = "sse2"),
all(target_arch = "loongarch64", target_feature = "lsx")
)))]
use crate::intrinsics::const_eval_select;
use crate::{ascii, iter, ops};
@ -359,7 +362,10 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
///
/// If any of these loads produces something for which `contains_nonascii`
/// (above) returns true, then we know the answer is false.
#[cfg(not(all(target_arch = "x86_64", target_feature = "sse2")))]
#[cfg(not(any(
all(target_arch = "x86_64", target_feature = "sse2"),
all(target_arch = "loongarch64", target_feature = "lsx")
)))]
#[inline]
#[rustc_allow_const_fn_unstable(const_eval_select)] // fallback impl has same behavior
const fn is_ascii(s: &[u8]) -> bool {
@ -457,12 +463,15 @@ const fn is_ascii(s: &[u8]) -> bool {
)
}
/// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64`
/// platforms.
/// ASCII test optimized to use the `pmovmskb` instruction on `x86-64` and the
/// `vmskltz.b` instruction on `loongarch64`.
///
/// Other platforms are not likely to benefit from this code structure, so they
/// use SWAR techniques to test for ASCII in `usize`-sized chunks.
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
#[cfg(any(
all(target_arch = "x86_64", target_feature = "sse2"),
all(target_arch = "loongarch64", target_feature = "lsx")
))]
#[inline]
const fn is_ascii(bytes: &[u8]) -> bool {
// Process chunks of 32 bytes at a time in the fast path to enable