LoongArch64 LSX fast-path for str.contains(&str)
Benchmark results with LLVM 21 on LA664: ``` OLD: test bench_is_contained_in ... bench: 43.63 ns/iter (+/- 0.04) NEW: test bench_is_contained_in ... bench: 12.81 ns/iter (+/- 0.01) ```
This commit is contained in:
parent
ace6330903
commit
1ceacf55a0
1 changed files with 16 additions and 3 deletions
|
|
@ -996,7 +996,10 @@ impl<'b> Pattern for &'b str {
|
|||
return haystack.as_bytes().contains(&self.as_bytes()[0]);
|
||||
}
|
||||
|
||||
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
|
||||
#[cfg(any(
|
||||
all(target_arch = "x86_64", target_feature = "sse2"),
|
||||
all(target_arch = "loongarch64", target_feature = "lsx")
|
||||
))]
|
||||
if self.len() <= 32 {
|
||||
if let Some(result) = simd_contains(self, haystack) {
|
||||
return result;
|
||||
|
|
@ -1770,11 +1773,18 @@ impl TwoWayStrategy for RejectAndMatch {
|
|||
/// If we ever ship std with for x86-64-v3 or adapt this for other platforms then wider vectors
|
||||
/// should be evaluated.
|
||||
///
|
||||
/// Similarly, on LoongArch the 128-bit LSX vector extension is the baseline,
|
||||
/// so we also use `u8x16` there. Wider vector widths may be considered
|
||||
/// for future LoongArch extensions (e.g., LASX).
|
||||
///
|
||||
/// For haystacks smaller than vector-size + needle length it falls back to
|
||||
/// a naive O(n*m) search so this implementation should not be called on larger needles.
|
||||
///
|
||||
/// [0]: http://0x80.pl/articles/simd-strfind.html#sse-avx2
|
||||
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
|
||||
#[cfg(any(
|
||||
all(target_arch = "x86_64", target_feature = "sse2"),
|
||||
all(target_arch = "loongarch64", target_feature = "lsx")
|
||||
))]
|
||||
#[inline]
|
||||
fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
|
||||
let needle = needle.as_bytes();
|
||||
|
|
@ -1906,7 +1916,10 @@ fn simd_contains(needle: &str, haystack: &str) -> Option<bool> {
|
|||
/// # Safety
|
||||
///
|
||||
/// Both slices must have the same length.
|
||||
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] // only called on x86
|
||||
#[cfg(any(
|
||||
all(target_arch = "x86_64", target_feature = "sse2"),
|
||||
all(target_arch = "loongarch64", target_feature = "lsx")
|
||||
))]
|
||||
#[inline]
|
||||
unsafe fn small_slice_eq(x: &[u8], y: &[u8]) -> bool {
|
||||
debug_assert_eq!(x.len(), y.len());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue