Optimize SliceIndex<str> for RangeInclusive

Replace `self.end() == usize::MAX` and `self.end() + 1 > slice.len()`
with `self.end() >= slice.len()`. Same reasoning as previous commit.

Also consolidate the str panicking functions into function.
This commit is contained in:
Karl Meakin 2025-07-25 21:57:44 +01:00
parent 625b18027d
commit 262cd76333
6 changed files with 97 additions and 81 deletions

View file

@ -630,13 +630,13 @@ mod slice_index {
// note: using 0 specifically ensures that the result of overflowing is 0..0,
// so that `get` doesn't simply return None for the wrong reason.
bad: data[0..=usize::MAX];
message: "maximum usize";
message: "out of bounds";
}
in mod rangetoinclusive {
data: "hello";
bad: data[..=usize::MAX];
message: "maximum usize";
message: "out of bounds";
}
}
}
@ -659,49 +659,49 @@ mod slice_index {
data: super::DATA;
bad: data[super::BAD_START..super::GOOD_END];
message:
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
}
in mod range_2 {
data: super::DATA;
bad: data[super::GOOD_START..super::BAD_END];
message:
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
}
in mod rangefrom {
data: super::DATA;
bad: data[super::BAD_START..];
message:
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
}
in mod rangeto {
data: super::DATA;
bad: data[..super::BAD_END];
message:
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
}
in mod rangeinclusive_1 {
data: super::DATA;
bad: data[super::BAD_START..=super::GOOD_END_INCL];
message:
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
}
in mod rangeinclusive_2 {
data: super::DATA;
bad: data[super::GOOD_START..=super::BAD_END_INCL];
message:
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
}
in mod rangetoinclusive {
data: super::DATA;
bad: data[..=super::BAD_END_INCL];
message:
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
}
}
}
@ -716,7 +716,9 @@ mod slice_index {
// check the panic includes the prefix of the sliced string
#[test]
#[should_panic(expected = "byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")]
#[should_panic(
expected = "end byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet"
)]
fn test_slice_fail_truncated_1() {
let _ = &LOREM_PARAGRAPH[..1024];
}

View file

@ -352,15 +352,6 @@ impl<Idx: Step> RangeInclusive<Idx> {
}
}
impl RangeInclusive<usize> {
/// Converts to an exclusive `Range` for `SliceIndex` implementations.
/// The caller is responsible for dealing with `last == usize::MAX`.
#[inline]
pub(crate) const fn into_slice_range(self) -> Range<usize> {
Range { start: self.start, end: self.last + 1 }
}
}
#[stable(feature = "new_range_inclusive_api", since = "CURRENT_RUSTC_VERSION")]
#[rustc_const_unstable(feature = "const_range", issue = "none")]
impl<T> const RangeBounds<T> for RangeInclusive<T> {

View file

@ -85,34 +85,50 @@ fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! {
let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH);
let s_trunc = &s[..trunc_len];
let ellipsis = if trunc_len < s.len() { "[...]" } else { "" };
let len = s.len();
// 1. out of bounds
if begin > s.len() || end > s.len() {
let oob_index = if begin > s.len() { begin } else { end };
panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}");
// 1. begin is OOB.
if begin > len {
panic!("start byte index {begin} is out of bounds of `{s_trunc}`{ellipsis}");
}
// 2. begin <= end
assert!(
begin <= end,
"begin <= end ({} <= {}) when slicing `{}`{}",
begin,
end,
s_trunc,
ellipsis
);
// 2. end is OOB.
if end > len {
panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
}
// 3. character boundary
let index = if !s.is_char_boundary(begin) { begin } else { end };
// find the character
let char_start = s.floor_char_boundary(index);
// `char_start` must be less than len and a char boundary
let ch = s[char_start..].chars().next().unwrap();
let char_range = char_start..char_start + ch.len_utf8();
panic!(
"byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
index, ch, char_range, s_trunc, ellipsis
);
// 3. range is backwards.
if begin > end {
panic!("begin <= end ({begin} <= {end}) when slicing `{s_trunc}`{ellipsis}")
}
// 4. begin is inside a character.
if !s.is_char_boundary(begin) {
let floor = s.floor_char_boundary(begin);
let ceil = s.ceil_char_boundary(begin);
let range = floor..ceil;
let ch = s[floor..ceil].chars().next().unwrap();
panic!(
"start byte index {begin} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
)
}
// 5. end is inside a character.
if !s.is_char_boundary(end) {
let floor = s.floor_char_boundary(end);
let ceil = s.ceil_char_boundary(end);
let range = floor..ceil;
let ch = s[floor..ceil].chars().next().unwrap();
panic!(
"end byte index {end} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
)
}
// 6. end is OOB and range is inclusive (end == len).
// This test cannot be combined with 2. above because for cases like
// `"abcαβγ"[4..9]` the error is that 4 is inside 'α', not that 9 is OOB.
debug_assert_eq!(end, len);
panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
}
impl str {

View file

@ -76,13 +76,6 @@ where
}
}
#[inline(never)]
#[cold]
#[track_caller]
const fn str_index_overflow_fail() -> ! {
panic!("attempted to index str up to maximum usize");
}
/// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
///
/// Returns a slice of the whole string, i.e., returns `&self` or `&mut
@ -640,11 +633,11 @@ unsafe impl const SliceIndex<str> for ops::RangeInclusive<usize> {
type Output = str;
#[inline]
fn get(self, slice: &str) -> Option<&Self::Output> {
if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) }
if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) }
}
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) }
}
#[inline]
unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
@ -658,17 +651,37 @@ unsafe impl const SliceIndex<str> for ops::RangeInclusive<usize> {
}
#[inline]
fn index(self, slice: &str) -> &Self::Output {
if *self.end() == usize::MAX {
str_index_overflow_fail();
let Self { mut start, mut end, exhausted } = self;
let len = slice.len();
if end < len {
end = end + 1;
start = if exhausted { end } else { start };
if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) {
// SAFETY: just checked that `start` and `end` are on a char boundary,
// and we are passing in a safe reference, so the return value will also be one.
// We also checked char boundaries, so this is valid UTF-8.
unsafe { return &*(start..end).get_unchecked(slice) }
}
}
self.into_slice_range().index(slice)
super::slice_error_fail(slice, start, end)
}
#[inline]
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
if *self.end() == usize::MAX {
str_index_overflow_fail();
let Self { mut start, mut end, exhausted } = self;
let len = slice.len();
if end < len {
end = end + 1;
start = if exhausted { end } else { start };
if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) {
// SAFETY: just checked that `start` and `end` are on a char boundary,
// and we are passing in a safe reference, so the return value will also be one.
// We also checked char boundaries, so this is valid UTF-8.
unsafe { return &mut *(start..end).get_unchecked_mut(slice) }
}
}
self.into_slice_range().index_mut(slice)
super::slice_error_fail(slice, start, end)
}
}
@ -678,35 +691,29 @@ unsafe impl const SliceIndex<str> for range::RangeInclusive<usize> {
type Output = str;
#[inline]
fn get(self, slice: &str) -> Option<&Self::Output> {
if self.last == usize::MAX { None } else { self.into_slice_range().get(slice) }
ops::RangeInclusive::from(self).get(slice)
}
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
if self.last == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
ops::RangeInclusive::from(self).get_mut(slice)
}
#[inline]
unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
// SAFETY: the caller must uphold the safety contract for `get_unchecked`.
unsafe { self.into_slice_range().get_unchecked(slice) }
unsafe { ops::RangeInclusive::from(self).get_unchecked(slice) }
}
#[inline]
unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
// SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
unsafe { self.into_slice_range().get_unchecked_mut(slice) }
unsafe { ops::RangeInclusive::from(self).get_unchecked_mut(slice) }
}
#[inline]
fn index(self, slice: &str) -> &Self::Output {
if self.last == usize::MAX {
str_index_overflow_fail();
}
self.into_slice_range().index(slice)
ops::RangeInclusive::from(self).index(slice)
}
#[inline]
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
if self.last == usize::MAX {
str_index_overflow_fail();
}
self.into_slice_range().index_mut(slice)
ops::RangeInclusive::from(self).index_mut(slice)
}
}

View file

@ -35,18 +35,18 @@ macro_rules! tests {
// CHECK: ret
tests!(Range<usize>, get_range, index_range);
// 9 comparisons required:
// end != usize::MAX && start <= end + 1
// && (start == 0 || (start >= len && start == len) || bytes[start] >= -0x40)
// && ( (end + 1 >= len && end + 1 == len) || bytes[end + 1] >= -0x40)
// 7 comparisons required:
// end < len && start <= end + 1
// && (start == 0 || start >= len || bytes[start] >= -0x40)
// && ( end + 1 >= len || bytes[end + 1] >= -0x40)
// CHECK-LABEL: @get_range_inclusive
// CHECK-COUNT-9: %{{.+}} = icmp
// CHECK-COUNT-7: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_inclusive
// CHECK-COUNT-9: %{{.+}} = icmp
// CHECK-COUNT-7: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeInclusive<usize>, get_range_inclusive, index_range_inclusive);
@ -65,16 +65,16 @@ tests!(RangeInclusive<usize>, get_range_inclusive, index_range_inclusive);
// CHECK: ret
tests!(RangeTo<usize>, get_range_to, index_range_to);
// 4 comparisons required:
// end != usize::MAX && (end + 1 >= len && end + 1 == len) || bytes[end + 1] >= -0x40)
// 3 comparisons required:
// end < len && (end + 1 >= len || bytes[end + 1] >= -0x40)
// CHECK-LABEL: @get_range_to_inclusive
// CHECK-COUNT-4: %{{.+}} = icmp
// CHECK-COUNT-3: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_to_inclusive
// CHECK-COUNT-4: %{{.+}} = icmp
// CHECK-COUNT-3: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeToInclusive<usize>, get_range_to_inclusive, index_range_to_inclusive);

View file

@ -1,4 +1,4 @@
thread 'main' ($TID) panicked at $DIR/const-eval-select-backtrace-std.rs:6:8:
byte index 1 is out of bounds of ``
start byte index 1 is out of bounds of ``
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace