From b031c516641c53577d021da1a89d0c953acdc23a Mon Sep 17 00:00:00 2001 From: okaneco <47607823+okaneco@users.noreply.github.com> Date: Tue, 7 Oct 2025 05:52:37 -0400 Subject: [PATCH] slice/ascii: Optimize `eq_ignore_ascii_case` with auto-vectorization Refactor the current functionality into a helper function Use `as_chunks` to encourage auto-vectorization in the optimized chunk processing function Add a codegen test Add benches for `eq_ignore_ascii_case` The optimized function is initially only enabled for x86_64 which has `sse2` as part of its baseline, but none of the code is platform specific. Other platforms with SIMD instructions may also benefit from this implementation. Performance improvements only manifest for slices of 16 bytes or longer, so the optimized path is gated behind a length check for greater than or equal to 16. --- library/core/src/slice/ascii.rs | 43 ++++++++++++++ library/coretests/benches/ascii.rs | 1 + .../benches/ascii/eq_ignore_ascii_case.rs | 56 +++++++++++++++++++ .../lib-optimizations/eq_ignore_ascii_case.rs | 14 +++++ 4 files changed, 114 insertions(+) create mode 100644 library/coretests/benches/ascii/eq_ignore_ascii_case.rs create mode 100644 tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index e17a2e03d2dc..1f9ca4bc6698 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -60,6 +60,18 @@ impl [u8] { return false; } + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + if self.len() >= 16 { + return self.eq_ignore_ascii_case_chunks(other); + } + + self.eq_ignore_ascii_case_simple(other) + } + + /// ASCII case-insensitive equality check without chunk-at-a-time + /// optimization. + #[inline] + const fn eq_ignore_ascii_case_simple(&self, other: &[u8]) -> bool { // FIXME(const-hack): This implementation can be reverted when // `core::iter::zip` is allowed in const. The original implementation: // self.len() == other.len() && iter::zip(self, other).all(|(a, b)| a.eq_ignore_ascii_case(b)) @@ -78,6 +90,37 @@ impl [u8] { true } + /// Optimized version of `eq_ignore_ascii_case` which processes chunks at a + /// time. + /// + /// Platforms that have SIMD instructions may benefit from this + /// implementation over `eq_ignore_ascii_case_simple`. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + const fn eq_ignore_ascii_case_chunks(&self, other: &[u8]) -> bool { + const N: usize = 16; + let (a, a_rem) = self.as_chunks::(); + let (b, b_rem) = other.as_chunks::(); + + let mut i = 0; + while i < a.len() && i < b.len() { + let mut equal_ascii = true; + let mut j = 0; + while j < N { + equal_ascii &= a[i][j].eq_ignore_ascii_case(&b[i][j]); + j += 1; + } + + if !equal_ascii { + return false; + } + + i += 1; + } + + a_rem.eq_ignore_ascii_case_simple(b_rem) + } + /// Converts this slice to its ASCII upper case equivalent in-place. /// /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', diff --git a/library/coretests/benches/ascii.rs b/library/coretests/benches/ascii.rs index 64bdc7fed118..17a520922bfa 100644 --- a/library/coretests/benches/ascii.rs +++ b/library/coretests/benches/ascii.rs @@ -1,3 +1,4 @@ +mod eq_ignore_ascii_case; mod is_ascii; // Lower-case ASCII 'a' is the first byte that has its highest bit set diff --git a/library/coretests/benches/ascii/eq_ignore_ascii_case.rs b/library/coretests/benches/ascii/eq_ignore_ascii_case.rs new file mode 100644 index 000000000000..a51acb1e8463 --- /dev/null +++ b/library/coretests/benches/ascii/eq_ignore_ascii_case.rs @@ -0,0 +1,56 @@ +use test::Bencher; + +#[bench] +fn bench_str_under_8_bytes_eq(b: &mut Bencher) { + let s = "foo"; + let other = "FOo"; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} + +#[bench] +fn bench_str_of_8_bytes_eq(b: &mut Bencher) { + let s = "foobar78"; + let other = "FOObAr78"; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} + +#[bench] +fn bench_str_17_bytes_eq(b: &mut Bencher) { + let s = "performance-criti"; + let other = "performANce-cRIti"; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} + +#[bench] +fn bench_str_31_bytes_eq(b: &mut Bencher) { + let s = "foobarbazquux02foobarbazquux025"; + let other = "fooBARbazQuuX02fooBARbazQuuX025"; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} + +#[bench] +fn bench_long_str_eq(b: &mut Bencher) { + let s = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor \ + incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud \ + exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute \ + irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \ + pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui \ + officia deserunt mollit anim id est laborum."; + let other = "Lorem ipsum dolor sit amet, CONSECTETUR adipisicing elit, sed do eiusmod tempor \ + incididunt ut labore et dolore MAGNA aliqua. Ut enim ad MINIM veniam, quis nostrud \ + exercitation ullamco LABORIS nisi ut aliquip ex ea commodo consequat. Duis aute \ + irure dolor in reprehenderit in voluptate velit esse cillum DOLORE eu fugiat nulla \ + pariatur. Excepteur sint occaecat CUPIDATAT non proident, sunt in culpa qui \ + officia deserunt mollit anim id est laborum."; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} diff --git a/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs b/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs new file mode 100644 index 000000000000..b733f1812c92 --- /dev/null +++ b/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs @@ -0,0 +1,14 @@ +//@ compile-flags: -Copt-level=3 +//@ only-x86_64 +#![crate_type = "lib"] + +// Ensure that the optimized variant of the function gets auto-vectorized. +// CHECK-LABEL: @eq_ignore_ascii_case_autovectorized +#[no_mangle] +pub fn eq_ignore_ascii_case_autovectorized(s: &str, other: &str) -> bool { + // CHECK: load <16 x i8> + // CHECK: load <16 x i8> + // CHECK: bitcast <16 x i1> + // CHECK-NOT: panic + s.eq_ignore_ascii_case(other) +}