LoongArch64 LSX fast-path for str.contains(&str)

Benchmark results with LLVM 21 on LA664:

```
OLD:
test bench_is_contained_in ... bench:          43.63 ns/iter (+/- 0.04)

NEW:
test bench_is_contained_in ... bench:          12.81 ns/iter (+/- 0.01)
```
This commit is contained in:
WANG Rui 2025-06-24 20:19:10 +08:00
parent ace6330903
commit 1ceacf55a0

View file

@ -996,7 +996,10 @@ impl<'b> Pattern for &'b str {
return haystack.as_bytes().contains(&self.as_bytes()[0]);
}
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
#[cfg(any(
all(target_arch = "x86_64", target_feature = "sse2"),
all(target_arch = "loongarch64", target_feature = "lsx")
))]
if self.len() <= 32 {
if let Some(result) = simd_contains(self, haystack) {
return result;
@ -1770,11 +1773,18 @@ impl TwoWayStrategy for RejectAndMatch {
/// If we ever ship std with for x86-64-v3 or adapt this for other platforms then wider vectors
/// should be evaluated.
///
/// Similarly, on LoongArch the 128-bit LSX vector extension is the baseline,
/// so we also use `u8x16` there. Wider vector widths may be considered
/// for future LoongArch extensions (e.g., LASX).
///
/// For haystacks smaller than vector-size + needle length it falls back to
/// a naive O(n*m) search so this implementation should not be called on larger needles.
///
/// [0]: http://0x80.pl/articles/simd-strfind.html#sse-avx2
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
#[cfg(any(
all(target_arch = "x86_64", target_feature = "sse2"),
all(target_arch = "loongarch64", target_feature = "lsx")
))]
#[inline]
fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
let needle = needle.as_bytes();
@ -1906,7 +1916,10 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
/// # Safety
///
/// Both slices must have the same length.
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] // only called on x86
#[cfg(any(
all(target_arch = "x86_64", target_feature = "sse2"),
all(target_arch = "loongarch64", target_feature = "lsx")
))]
#[inline]
unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool {
debug_assert_eq!(x.len(), y.len());