Optimize SliceIndex<str> for RangeInclusive

Replace `self.end() == usize::MAX` and `self.end() + 1 > slice.len()` with `self.end() >= slice.len()`. Same reasoning as previous commit. Also consolidate the str panicking functions into function.
2025-07-25 21:57:44 +01:00 · 2025-07-25 21:57:44 +01:00 · 262cd76333
commit 262cd76333
parent 625b18027d
6 changed files with 97 additions and 81 deletions
--- a/library/alloctests/tests/str.rs
+++ b/library/alloctests/tests/str.rs
@ -630,13 +630,13 @@ mod slice_index {
                // note: using 0 specifically ensures that the result of overflowing is 0..0,
                //       so that `get` doesn't simply return None for the wrong reason.
                bad: data[0..=usize::MAX];
-                message: "maximum usize";
+                message: "out of bounds";
            }

            in mod rangetoinclusive {
                data: "hello";
                bad: data[..=usize::MAX];
-                message: "maximum usize";
+                message: "out of bounds";
            }
        }
    }
@ -659,49 +659,49 @@ mod slice_index {
                data: super::DATA;
                bad: data[super::BAD_START..super::GOOD_END];
                message:
-                    "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
+                    "start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
            }

            in mod range_2 {
                data: super::DATA;
                bad: data[super::GOOD_START..super::BAD_END];
                message:
-                    "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
+                    "end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
            }

            in mod rangefrom {
                data: super::DATA;
                bad: data[super::BAD_START..];
                message:
-                    "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
+                    "start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
            }

            in mod rangeto {
                data: super::DATA;
                bad: data[..super::BAD_END];
                message:
-                    "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
+                    "end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
            }

            in mod rangeinclusive_1 {
                data: super::DATA;
                bad: data[super::BAD_START..=super::GOOD_END_INCL];
                message:
-                    "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
+                    "start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
            }

            in mod rangeinclusive_2 {
                data: super::DATA;
                bad: data[super::GOOD_START..=super::BAD_END_INCL];
                message:
-                    "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
+                    "end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
            }

            in mod rangetoinclusive {
                data: super::DATA;
                bad: data[..=super::BAD_END_INCL];
                message:
-                    "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
+                    "end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
            }
        }
    }
@ -716,7 +716,9 @@ mod slice_index {

    // check the panic includes the prefix of the sliced string
    #[test]
-    #[should_panic(expected = "byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")]
+    #[should_panic(
+        expected = "end byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet"
+    )]
    fn test_slice_fail_truncated_1() {
        let _ = &LOREM_PARAGRAPH[..1024];
    }
--- a/library/core/src/range.rs
+++ b/library/core/src/range.rs
@ -352,15 +352,6 @@ impl<Idx: Step> RangeInclusive<Idx> {
    }
 }

-impl RangeInclusive<usize> {
-    /// Converts to an exclusive `Range` for `SliceIndex` implementations.
-    /// The caller is responsible for dealing with `last == usize::MAX`.
-    #[inline]
-    pub(crate) const fn into_slice_range(self) -> Range<usize> {
-        Range { start: self.start, end: self.last + 1 }
-    }
-}
-
 #[stable(feature = "new_range_inclusive_api", since = "CURRENT_RUSTC_VERSION")]
 #[rustc_const_unstable(feature = "const_range", issue = "none")]
 impl<T> const RangeBounds<T> for RangeInclusive<T> {
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@ -85,34 +85,50 @@ fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! {
    let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH);
    let s_trunc = &s[..trunc_len];
    let ellipsis = if trunc_len < s.len() { "[...]" } else { "" };
+    let len = s.len();

-    // 1. out of bounds
-    if begin > s.len() || end > s.len() {
-        let oob_index = if begin > s.len() { begin } else { end };
-        panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}");
+    // 1. begin is OOB.
+    if begin > len {
+        panic!("start byte index {begin} is out of bounds of `{s_trunc}`{ellipsis}");
    }

-    // 2. begin <= end
-    assert!(
-        begin <= end,
-        "begin <= end ({} <= {}) when slicing `{}`{}",
-        begin,
-        end,
-        s_trunc,
-        ellipsis
-    );
+    // 2. end is OOB.
+    if end > len {
+        panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
+    }

-    // 3. character boundary
-    let index = if !s.is_char_boundary(begin) { begin } else { end };
-    // find the character
-    let char_start = s.floor_char_boundary(index);
-    // `char_start` must be less than len and a char boundary
-    let ch = s[char_start..].chars().next().unwrap();
-    let char_range = char_start..char_start + ch.len_utf8();
-    panic!(
-        "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
-        index, ch, char_range, s_trunc, ellipsis
-    );
+    // 3. range is backwards.
+    if begin > end {
+        panic!("begin <= end ({begin} <= {end}) when slicing `{s_trunc}`{ellipsis}")
+    }
+
+    // 4. begin is inside a character.
+    if !s.is_char_boundary(begin) {
+        let floor = s.floor_char_boundary(begin);
+        let ceil = s.ceil_char_boundary(begin);
+        let range = floor..ceil;
+        let ch = s[floor..ceil].chars().next().unwrap();
+        panic!(
+            "start byte index {begin} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
+        )
+    }
+
+    // 5. end is inside a character.
+    if !s.is_char_boundary(end) {
+        let floor = s.floor_char_boundary(end);
+        let ceil = s.ceil_char_boundary(end);
+        let range = floor..ceil;
+        let ch = s[floor..ceil].chars().next().unwrap();
+        panic!(
+            "end byte index {end} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
+        )
+    }
+
+    // 6. end is OOB and range is inclusive (end == len).
+    // This test cannot be combined with 2. above because for cases like
+    // `"abcαβγ"[4..9]` the error is that 4 is inside 'α', not that 9 is OOB.
+    debug_assert_eq!(end, len);
+    panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
 }

 impl str {
--- a/library/core/src/str/traits.rs
+++ b/library/core/src/str/traits.rs
@ -76,13 +76,6 @@ where
    }
 }

-#[inline(never)]
-#[cold]
-#[track_caller]
-const fn str_index_overflow_fail() -> ! {
-    panic!("attempted to index str up to maximum usize");
-}
-
 /// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
 ///
 /// Returns a slice of the whole string, i.e., returns `&self` or `&mut
@ -640,11 +633,11 @@ unsafe impl const SliceIndex<str> for ops::RangeInclusive<usize> {
    type Output = str;
    #[inline]
    fn get(self, slice: &str) -> Option<&Self::Output> {
-        if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) }
+        if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) }
    }
    #[inline]
    fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
-        if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
+        if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) }
    }
    #[inline]
    unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
@ -658,17 +651,37 @@ unsafe impl const SliceIndex<str> for ops::RangeInclusive<usize> {
    }
    #[inline]
    fn index(self, slice: &str) -> &Self::Output {
-        if *self.end() == usize::MAX {
-            str_index_overflow_fail();
+        let Self { mut start, mut end, exhausted } = self;
+        let len = slice.len();
+        if end < len {
+            end = end + 1;
+            start = if exhausted { end } else { start };
+            if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) {
+                // SAFETY: just checked that `start` and `end` are on a char boundary,
+                // and we are passing in a safe reference, so the return value will also be one.
+                // We also checked char boundaries, so this is valid UTF-8.
+                unsafe { return &*(start..end).get_unchecked(slice) }
+            }
        }
-        self.into_slice_range().index(slice)
+
+        super::slice_error_fail(slice, start, end)
    }
    #[inline]
    fn index_mut(self, slice: &mut str) -> &mut Self::Output {
-        if *self.end() == usize::MAX {
-            str_index_overflow_fail();
+        let Self { mut start, mut end, exhausted } = self;
+        let len = slice.len();
+        if end < len {
+            end = end + 1;
+            start = if exhausted { end } else { start };
+            if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) {
+                // SAFETY: just checked that `start` and `end` are on a char boundary,
+                // and we are passing in a safe reference, so the return value will also be one.
+                // We also checked char boundaries, so this is valid UTF-8.
+                unsafe { return &mut *(start..end).get_unchecked_mut(slice) }
+            }
        }
-        self.into_slice_range().index_mut(slice)
+
+        super::slice_error_fail(slice, start, end)
    }
 }

@ -678,35 +691,29 @@ unsafe impl const SliceIndex<str> for range::RangeInclusive<usize> {
    type Output = str;
    #[inline]
    fn get(self, slice: &str) -> Option<&Self::Output> {
-        if self.last == usize::MAX { None } else { self.into_slice_range().get(slice) }
+        ops::RangeInclusive::from(self).get(slice)
    }
    #[inline]
    fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
-        if self.last == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
+        ops::RangeInclusive::from(self).get_mut(slice)
    }
    #[inline]
    unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
        // SAFETY: the caller must uphold the safety contract for `get_unchecked`.
-        unsafe { self.into_slice_range().get_unchecked(slice) }
+        unsafe { ops::RangeInclusive::from(self).get_unchecked(slice) }
    }
    #[inline]
    unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
        // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
-        unsafe { self.into_slice_range().get_unchecked_mut(slice) }
+        unsafe { ops::RangeInclusive::from(self).get_unchecked_mut(slice) }
    }
    #[inline]
    fn index(self, slice: &str) -> &Self::Output {
-        if self.last == usize::MAX {
-            str_index_overflow_fail();
-        }
-        self.into_slice_range().index(slice)
+        ops::RangeInclusive::from(self).index(slice)
    }
    #[inline]
    fn index_mut(self, slice: &mut str) -> &mut Self::Output {
-        if self.last == usize::MAX {
-            str_index_overflow_fail();
-        }
-        self.into_slice_range().index_mut(slice)
+        ops::RangeInclusive::from(self).index_mut(slice)
    }
 }

--- a/tests/codegen-llvm/str-range-indexing.rs
+++ b/tests/codegen-llvm/str-range-indexing.rs
@ -35,18 +35,18 @@ macro_rules! tests {
 // CHECK: ret
 tests!(Range<usize>, get_range, index_range);

-// 9 comparisons required:
-// end != usize::MAX && start <= end + 1
-// && (start == 0 || (start   >= len && start   == len) || bytes[start]   >= -0x40)
-// && (              (end + 1 >= len && end + 1 == len) || bytes[end + 1] >= -0x40)
+// 7 comparisons required:
+// end < len && start <= end + 1
+// && (start == 0 || start   >= len || bytes[start]   >= -0x40)
+// && (              end + 1 >= len || bytes[end + 1] >= -0x40)

 // CHECK-LABEL: @get_range_inclusive
-// CHECK-COUNT-9: %{{.+}} = icmp
+// CHECK-COUNT-7: %{{.+}} = icmp
 // CHECK-NOT: %{{.+}} = icmp
 // CHECK: ret

 // CHECK-LABEL: @index_range_inclusive
-// CHECK-COUNT-9: %{{.+}} = icmp
+// CHECK-COUNT-7: %{{.+}} = icmp
 // CHECK-NOT: %{{.+}} = icmp
 // CHECK: ret
 tests!(RangeInclusive<usize>, get_range_inclusive, index_range_inclusive);
@ -65,16 +65,16 @@ tests!(RangeInclusive<usize>, get_range_inclusive, index_range_inclusive);
 // CHECK: ret
 tests!(RangeTo<usize>, get_range_to, index_range_to);

-// 4 comparisons required:
-// end != usize::MAX && (end + 1 >= len && end + 1 == len) || bytes[end + 1] >= -0x40)
+// 3 comparisons required:
+// end < len && (end + 1 >= len || bytes[end + 1] >= -0x40)

 // CHECK-LABEL: @get_range_to_inclusive
-// CHECK-COUNT-4: %{{.+}} = icmp
+// CHECK-COUNT-3: %{{.+}} = icmp
 // CHECK-NOT: %{{.+}} = icmp
 // CHECK: ret

 // CHECK-LABEL: @index_range_to_inclusive
-// CHECK-COUNT-4: %{{.+}} = icmp
+// CHECK-COUNT-3: %{{.+}} = icmp
 // CHECK-NOT: %{{.+}} = icmp
 // CHECK: ret
 tests!(RangeToInclusive<usize>, get_range_to_inclusive, index_range_to_inclusive);
--- a/tests/ui/intrinsics/const-eval-select-backtrace-std.run.stderr
+++ b/tests/ui/intrinsics/const-eval-select-backtrace-std.run.stderr
@ -1,4 +1,4 @@

 thread 'main' ($TID) panicked at $DIR/const-eval-select-backtrace-std.rs:6:8:
-byte index 1 is out of bounds of ``
+start byte index 1 is out of bounds of ``
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace