diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index e17a2e03d2dc..1f9ca4bc6698 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -60,6 +60,18 @@ impl [u8] { return false; } + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + if self.len() >= 16 { + return self.eq_ignore_ascii_case_chunks(other); + } + + self.eq_ignore_ascii_case_simple(other) + } + + /// ASCII case-insensitive equality check without chunk-at-a-time + /// optimization. + #[inline] + const fn eq_ignore_ascii_case_simple(&self, other: &[u8]) -> bool { // FIXME(const-hack): This implementation can be reverted when // `core::iter::zip` is allowed in const. The original implementation: // self.len() == other.len() && iter::zip(self, other).all(|(a, b)| a.eq_ignore_ascii_case(b)) @@ -78,6 +90,37 @@ impl [u8] { true } + /// Optimized version of `eq_ignore_ascii_case` which processes chunks at a + /// time. + /// + /// Platforms that have SIMD instructions may benefit from this + /// implementation over `eq_ignore_ascii_case_simple`. + #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] + #[inline] + const fn eq_ignore_ascii_case_chunks(&self, other: &[u8]) -> bool { + const N: usize = 16; + let (a, a_rem) = self.as_chunks::(); + let (b, b_rem) = other.as_chunks::(); + + let mut i = 0; + while i < a.len() && i < b.len() { + let mut equal_ascii = true; + let mut j = 0; + while j < N { + equal_ascii &= a[i][j].eq_ignore_ascii_case(&b[i][j]); + j += 1; + } + + if !equal_ascii { + return false; + } + + i += 1; + } + + a_rem.eq_ignore_ascii_case_simple(b_rem) + } + /// Converts this slice to its ASCII upper case equivalent in-place. /// /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', diff --git a/library/coretests/benches/ascii.rs b/library/coretests/benches/ascii.rs index 64bdc7fed118..17a520922bfa 100644 --- a/library/coretests/benches/ascii.rs +++ b/library/coretests/benches/ascii.rs @@ -1,3 +1,4 @@ +mod eq_ignore_ascii_case; mod is_ascii; // Lower-case ASCII 'a' is the first byte that has its highest bit set diff --git a/library/coretests/benches/ascii/eq_ignore_ascii_case.rs b/library/coretests/benches/ascii/eq_ignore_ascii_case.rs new file mode 100644 index 000000000000..a51acb1e8463 --- /dev/null +++ b/library/coretests/benches/ascii/eq_ignore_ascii_case.rs @@ -0,0 +1,56 @@ +use test::Bencher; + +#[bench] +fn bench_str_under_8_bytes_eq(b: &mut Bencher) { + let s = "foo"; + let other = "FOo"; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} + +#[bench] +fn bench_str_of_8_bytes_eq(b: &mut Bencher) { + let s = "foobar78"; + let other = "FOObAr78"; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} + +#[bench] +fn bench_str_17_bytes_eq(b: &mut Bencher) { + let s = "performance-criti"; + let other = "performANce-cRIti"; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} + +#[bench] +fn bench_str_31_bytes_eq(b: &mut Bencher) { + let s = "foobarbazquux02foobarbazquux025"; + let other = "fooBARbazQuuX02fooBARbazQuuX025"; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} + +#[bench] +fn bench_long_str_eq(b: &mut Bencher) { + let s = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor \ + incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud \ + exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute \ + irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla \ + pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui \ + officia deserunt mollit anim id est laborum."; + let other = "Lorem ipsum dolor sit amet, CONSECTETUR adipisicing elit, sed do eiusmod tempor \ + incididunt ut labore et dolore MAGNA aliqua. Ut enim ad MINIM veniam, quis nostrud \ + exercitation ullamco LABORIS nisi ut aliquip ex ea commodo consequat. Duis aute \ + irure dolor in reprehenderit in voluptate velit esse cillum DOLORE eu fugiat nulla \ + pariatur. Excepteur sint occaecat CUPIDATAT non proident, sunt in culpa qui \ + officia deserunt mollit anim id est laborum."; + b.iter(|| { + assert!(s.eq_ignore_ascii_case(other)); + }) +} diff --git a/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs b/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs new file mode 100644 index 000000000000..b733f1812c92 --- /dev/null +++ b/tests/codegen-llvm/lib-optimizations/eq_ignore_ascii_case.rs @@ -0,0 +1,14 @@ +//@ compile-flags: -Copt-level=3 +//@ only-x86_64 +#![crate_type = "lib"] + +// Ensure that the optimized variant of the function gets auto-vectorized. +// CHECK-LABEL: @eq_ignore_ascii_case_autovectorized +#[no_mangle] +pub fn eq_ignore_ascii_case_autovectorized(s: &str, other: &str) -> bool { + // CHECK: load <16 x i8> + // CHECK: load <16 x i8> + // CHECK: bitcast <16 x i1> + // CHECK-NOT: panic + s.eq_ignore_ascii_case(other) +}