Rollup merge of #145024 - Kmeakin:km/optimize-slice-index/v3, r=Mark-Simulacrum

Optimize indexing slices and strs with inclusive ranges

Instead of separately checking for `end == usize::MAX` and `end + 1 > slice.len()`, we can check for `end >= slice.len()`. Also consolidate all the str indexing related panic functions into a single function which reports the correct error depending on the arguments, as the slice indexing code already does.

The downside of all this is that the panic message is slightly less specific when trying to index with `[..=usize::MAX]`: instead of saying "attempted to index str up to maximum usize" it just says "end byte index {end} out of bounds". But this is a rare enough case that I think it is acceptable
This commit is contained in:
Jonathan Brouwer 2026-02-14 22:11:52 +01:00 committed by GitHub
commit 6d625cc074
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 299 additions and 95 deletions

View file

@ -612,14 +612,14 @@ mod slice_index {
data: "abcdef";
good: data[4..4] == "";
bad: data[4..3];
message: "begin <= end (4 <= 3)";
message: "begin > end (4 > 3)";
}
in mod rangeinclusive_neg_width {
data: "abcdef";
good: data[4..=3] == "";
bad: data[4..=2];
message: "begin <= end (4 <= 3)";
message: "begin > end (4 > 3)";
}
}
@ -630,13 +630,13 @@ mod slice_index {
// note: using 0 specifically ensures that the result of overflowing is 0..0,
// so that `get` doesn't simply return None for the wrong reason.
bad: data[0..=usize::MAX];
message: "maximum usize";
message: "out of bounds";
}
in mod rangetoinclusive {
data: "hello";
bad: data[..=usize::MAX];
message: "maximum usize";
message: "out of bounds";
}
}
}
@ -659,49 +659,49 @@ mod slice_index {
data: super::DATA;
bad: data[super::BAD_START..super::GOOD_END];
message:
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
}
in mod range_2 {
data: super::DATA;
bad: data[super::GOOD_START..super::BAD_END];
message:
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
}
in mod rangefrom {
data: super::DATA;
bad: data[super::BAD_START..];
message:
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
}
in mod rangeto {
data: super::DATA;
bad: data[..super::BAD_END];
message:
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
}
in mod rangeinclusive_1 {
data: super::DATA;
bad: data[super::BAD_START..=super::GOOD_END_INCL];
message:
"byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
"start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of";
}
in mod rangeinclusive_2 {
data: super::DATA;
bad: data[super::GOOD_START..=super::BAD_END_INCL];
message:
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
}
in mod rangetoinclusive {
data: super::DATA;
bad: data[..=super::BAD_END_INCL];
message:
"byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
"end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of";
}
}
}
@ -716,7 +716,9 @@ mod slice_index {
// check the panic includes the prefix of the sliced string
#[test]
#[should_panic(expected = "byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")]
#[should_panic(
expected = "end byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet"
)]
fn test_slice_fail_truncated_1() {
let _ = &LOREM_PARAGRAPH[..1024];
}

View file

@ -352,15 +352,6 @@ impl<Idx: Step> RangeInclusive<Idx> {
}
}
impl RangeInclusive<usize> {
/// Converts to an exclusive `Range` for `SliceIndex` implementations.
/// The caller is responsible for dealing with `last == usize::MAX`.
#[inline]
pub(crate) const fn into_slice_range(self) -> Range<usize> {
Range { start: self.start, end: self.last + 1 }
}
}
#[stable(feature = "new_range_inclusive_api", since = "CURRENT_RUSTC_VERSION")]
#[rustc_const_unstable(feature = "const_range", issue = "none")]
impl<T> const RangeBounds<T> for RangeInclusive<T> {

View file

@ -663,7 +663,6 @@ unsafe impl<T> const SliceIndex<[T]> for ops::RangeFull {
}
/// The methods `index` and `index_mut` panic if:
/// - the end of the range is `usize::MAX` or
/// - the start of the range is greater than the end of the range or
/// - the end of the range is out of bounds.
#[stable(feature = "inclusive_range", since = "1.26.0")]
@ -673,12 +672,12 @@ unsafe impl<T> const SliceIndex<[T]> for ops::RangeInclusive<usize> {
#[inline]
fn get(self, slice: &[T]) -> Option<&[T]> {
if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) }
if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) }
}
#[inline]
fn get_mut(self, slice: &mut [T]) -> Option<&mut [T]> {
if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) }
}
#[inline]
@ -950,8 +949,7 @@ where
R: ops::RangeBounds<usize>,
{
let len = bounds.end;
let r = into_range(len, (range.start_bound().copied(), range.end_bound().copied()))?;
if r.start > r.end || r.end > len { None } else { Some(r) }
try_into_slice_range(len, (range.start_bound().copied(), range.end_bound().copied()))
}
/// Converts a pair of `ops::Bound`s into `ops::Range` without performing any
@ -978,25 +976,31 @@ pub(crate) const fn into_range_unchecked(
/// Returns `None` on overflowing indices.
#[rustc_const_unstable(feature = "const_range", issue = "none")]
#[inline]
pub(crate) const fn into_range(
pub(crate) const fn try_into_slice_range(
len: usize,
(start, end): (ops::Bound<usize>, ops::Bound<usize>),
) -> Option<ops::Range<usize>> {
use ops::Bound;
let start = match start {
Bound::Included(start) => start,
Bound::Excluded(start) => start.checked_add(1)?,
Bound::Unbounded => 0,
};
let end = match end {
Bound::Included(end) => end.checked_add(1)?,
Bound::Excluded(end) => end,
Bound::Unbounded => len,
ops::Bound::Included(end) if end >= len => return None,
// Cannot overflow because `end < len` implies `end < usize::MAX`.
ops::Bound::Included(end) => end + 1,
ops::Bound::Excluded(end) if end > len => return None,
ops::Bound::Excluded(end) => end,
ops::Bound::Unbounded => len,
};
// Don't bother with checking `start < end` and `end <= len`
// since these checks are handled by `Range` impls
let start = match start {
ops::Bound::Excluded(start) if start >= end => return None,
// Cannot overflow because `start < end` implies `start < usize::MAX`.
ops::Bound::Excluded(start) => start + 1,
ops::Bound::Included(start) if start > end => return None,
ops::Bound::Included(start) => start,
ops::Bound::Unbounded => 0,
};
Some(start..end)
}
@ -1039,12 +1043,12 @@ unsafe impl<T> SliceIndex<[T]> for (ops::Bound<usize>, ops::Bound<usize>) {
#[inline]
fn get(self, slice: &[T]) -> Option<&Self::Output> {
into_range(slice.len(), self)?.get(slice)
try_into_slice_range(slice.len(), self)?.get(slice)
}
#[inline]
fn get_mut(self, slice: &mut [T]) -> Option<&mut Self::Output> {
into_range(slice.len(), self)?.get_mut(slice)
try_into_slice_range(slice.len(), self)?.get_mut(slice)
}
#[inline]

View file

@ -85,34 +85,50 @@ fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! {
let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH);
let s_trunc = &s[..trunc_len];
let ellipsis = if trunc_len < s.len() { "[...]" } else { "" };
let len = s.len();
// 1. out of bounds
if begin > s.len() || end > s.len() {
let oob_index = if begin > s.len() { begin } else { end };
panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}");
// 1. begin is OOB.
if begin > len {
panic!("start byte index {begin} is out of bounds of `{s_trunc}`{ellipsis}");
}
// 2. begin <= end
assert!(
begin <= end,
"begin <= end ({} <= {}) when slicing `{}`{}",
begin,
end,
s_trunc,
ellipsis
);
// 2. end is OOB.
if end > len {
panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
}
// 3. character boundary
let index = if !s.is_char_boundary(begin) { begin } else { end };
// find the character
let char_start = s.floor_char_boundary(index);
// `char_start` must be less than len and a char boundary
let ch = s[char_start..].chars().next().unwrap();
let char_range = char_start..char_start + ch.len_utf8();
// 3. range is backwards.
if begin > end {
panic!("begin > end ({begin} > {end}) when slicing `{s_trunc}`{ellipsis}")
}
// 4. begin is inside a character.
if !s.is_char_boundary(begin) {
let floor = s.floor_char_boundary(begin);
let ceil = s.ceil_char_boundary(begin);
let range = floor..ceil;
let ch = s[floor..ceil].chars().next().unwrap();
panic!(
"byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
index, ch, char_range, s_trunc, ellipsis
);
"start byte index {begin} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
)
}
// 5. end is inside a character.
if !s.is_char_boundary(end) {
let floor = s.floor_char_boundary(end);
let ceil = s.ceil_char_boundary(end);
let range = floor..ceil;
let ch = s[floor..ceil].chars().next().unwrap();
panic!(
"end byte index {end} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}"
)
}
// 6. end is OOB and range is inclusive (end == len).
// This test cannot be combined with 2. above because for cases like
// `"abcαβγ"[4..9]` the error is that 4 is inside 'α', not that 9 is OOB.
debug_assert_eq!(end, len);
panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}");
}
impl str {

View file

@ -76,13 +76,6 @@ where
}
}
#[inline(never)]
#[cold]
#[track_caller]
const fn str_index_overflow_fail() -> ! {
panic!("attempted to index str up to maximum usize");
}
/// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`.
///
/// Returns a slice of the whole string, i.e., returns `&self` or `&mut
@ -389,12 +382,12 @@ unsafe impl SliceIndex<str> for (ops::Bound<usize>, ops::Bound<usize>) {
#[inline]
fn get(self, slice: &str) -> Option<&str> {
crate::slice::index::into_range(slice.len(), self)?.get(slice)
crate::slice::index::try_into_slice_range(slice.len(), self)?.get(slice)
}
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut str> {
crate::slice::index::into_range(slice.len(), self)?.get_mut(slice)
crate::slice::index::try_into_slice_range(slice.len(), self)?.get_mut(slice)
}
#[inline]
@ -640,11 +633,11 @@ unsafe impl const SliceIndex<str> for ops::RangeInclusive<usize> {
type Output = str;
#[inline]
fn get(self, slice: &str) -> Option<&Self::Output> {
if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) }
if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) }
}
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) }
}
#[inline]
unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
@ -658,17 +651,37 @@ unsafe impl const SliceIndex<str> for ops::RangeInclusive<usize> {
}
#[inline]
fn index(self, slice: &str) -> &Self::Output {
if *self.end() == usize::MAX {
str_index_overflow_fail();
let Self { mut start, mut end, exhausted } = self;
let len = slice.len();
if end < len {
end = end + 1;
start = if exhausted { end } else { start };
if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) {
// SAFETY: just checked that `start` and `end` are on a char boundary,
// and we are passing in a safe reference, so the return value will also be one.
// We also checked char boundaries, so this is valid UTF-8.
unsafe { return &*(start..end).get_unchecked(slice) }
}
self.into_slice_range().index(slice)
}
super::slice_error_fail(slice, start, end)
}
#[inline]
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
if *self.end() == usize::MAX {
str_index_overflow_fail();
let Self { mut start, mut end, exhausted } = self;
let len = slice.len();
if end < len {
end = end + 1;
start = if exhausted { end } else { start };
if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) {
// SAFETY: just checked that `start` and `end` are on a char boundary,
// and we are passing in a safe reference, so the return value will also be one.
// We also checked char boundaries, so this is valid UTF-8.
unsafe { return &mut *(start..end).get_unchecked_mut(slice) }
}
self.into_slice_range().index_mut(slice)
}
super::slice_error_fail(slice, start, end)
}
}
@ -678,35 +691,29 @@ unsafe impl const SliceIndex<str> for range::RangeInclusive<usize> {
type Output = str;
#[inline]
fn get(self, slice: &str) -> Option<&Self::Output> {
if self.last == usize::MAX { None } else { self.into_slice_range().get(slice) }
ops::RangeInclusive::from(self).get(slice)
}
#[inline]
fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
if self.last == usize::MAX { None } else { self.into_slice_range().get_mut(slice) }
ops::RangeInclusive::from(self).get_mut(slice)
}
#[inline]
unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output {
// SAFETY: the caller must uphold the safety contract for `get_unchecked`.
unsafe { self.into_slice_range().get_unchecked(slice) }
unsafe { ops::RangeInclusive::from(self).get_unchecked(slice) }
}
#[inline]
unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output {
// SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`.
unsafe { self.into_slice_range().get_unchecked_mut(slice) }
unsafe { ops::RangeInclusive::from(self).get_unchecked_mut(slice) }
}
#[inline]
fn index(self, slice: &str) -> &Self::Output {
if self.last == usize::MAX {
str_index_overflow_fail();
}
self.into_slice_range().index(slice)
ops::RangeInclusive::from(self).index(slice)
}
#[inline]
fn index_mut(self, slice: &mut str) -> &mut Self::Output {
if self.last == usize::MAX {
str_index_overflow_fail();
}
self.into_slice_range().index_mut(slice)
ops::RangeInclusive::from(self).index_mut(slice)
}
}

View file

@ -0,0 +1,90 @@
//@ compile-flags: -Copt-level=3
//@ min-llvm-version: 21
#![crate_type = "lib"]
use std::ops::{Range, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive};
macro_rules! tests {
($range_ty:ty, $get_func_name:ident, $index_func_name:ident) => {
#[no_mangle]
pub fn $get_func_name(slice: &[u32], range: $range_ty) -> Option<&[u32]> {
slice.get(range)
}
#[no_mangle]
pub fn $index_func_name(slice: &[u32], range: $range_ty) -> &[u32] {
&slice[range]
}
};
}
// 2 comparisons required:
// end <= len && start <= end
// CHECK-LABEL: @get_range
// CHECK-COUNT-2: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range
// CHECK-COUNT-2: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(Range<usize>, get_range, index_range);
// 2 comparisons required:
// end < len && start <= end + 1
// CHECK-LABEL: @get_range_inclusive
// CHECK-COUNT-2: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_inclusive
// CHECK-COUNT-2: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeInclusive<usize>, get_range_inclusive, index_range_inclusive);
// 1 comparison required:
// end <= len
// CHECK-LABEL: @get_range_to
// CHECK-COUNT-1: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_to
// CHECK-COUNT-1: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeTo<usize>, get_range_to, index_range_to);
// 1 comparison required:
// end < len
// CHECK-LABEL: @get_range_to_inclusive
// CHECK-COUNT-1: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_to_inclusive
// CHECK-COUNT-1: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeToInclusive<usize>, get_range_to_inclusive, index_range_to_inclusive);
// 1 comparison required:
// start <= len
// CHECK-LABEL: @get_range_from
// CHECK-COUNT-1: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_from
// CHECK-COUNT-1: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeFrom<usize>, get_range_from, index_range_from);

View file

@ -0,0 +1,94 @@
//@ compile-flags: -Copt-level=3
//@ min-llvm-version: 21
#![crate_type = "lib"]
use std::ops::{Range, RangeFrom, RangeInclusive, RangeTo, RangeToInclusive};
macro_rules! tests {
($range_ty:ty, $get_func_name:ident, $index_func_name:ident) => {
#[no_mangle]
pub fn $get_func_name(slice: &str, range: $range_ty) -> Option<&str> {
slice.get(range)
}
#[no_mangle]
pub fn $index_func_name(slice: &str, range: $range_ty) -> &str {
&slice[range]
}
};
}
// 9 comparisons required:
// start <= end
// && (start == 0 || (start >= len && start == len) || bytes[start] >= -0x40)
// && (end == 0 || (end >= len && end == len) || bytes[end] >= -0x40)
// CHECK-LABEL: @get_range
// CHECK-COUNT-9: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range
// CHECK-COUNT-9: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(Range<usize>, get_range, index_range);
// 7 comparisons required:
// end < len && start <= end + 1
// && (start == 0 || start >= len || bytes[start] >= -0x40)
// && ( end + 1 >= len || bytes[end + 1] >= -0x40)
// CHECK-LABEL: @get_range_inclusive
// CHECK-COUNT-7: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_inclusive
// CHECK-COUNT-7: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeInclusive<usize>, get_range_inclusive, index_range_inclusive);
// 4 comparisons required:
// end == 0 || (end >= len && end == len) || bytes[end] >= -0x40
// CHECK-LABEL: @get_range_to
// CHECK-COUNT-4: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_to
// CHECK-COUNT-4: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeTo<usize>, get_range_to, index_range_to);
// 3 comparisons required:
// end < len && (end + 1 >= len || bytes[end + 1] >= -0x40)
// CHECK-LABEL: @get_range_to_inclusive
// CHECK-COUNT-3: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_to_inclusive
// CHECK-COUNT-3: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeToInclusive<usize>, get_range_to_inclusive, index_range_to_inclusive);
// 4 comparisons required:
// start == 0 || (start >= len && start == len) || bytes[start] >= -0x40)
// CHECK-LABEL: @get_range_from
// CHECK-COUNT-4: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
// CHECK-LABEL: @index_range_from
// CHECK-COUNT-4: %{{.+}} = icmp
// CHECK-NOT: %{{.+}} = icmp
// CHECK: ret
tests!(RangeFrom<usize>, get_range_from, index_range_from);

View file

@ -1,4 +1,4 @@
thread 'main' ($TID) panicked at $DIR/const-eval-select-backtrace-std.rs:6:8:
byte index 1 is out of bounds of ``
start byte index 1 is out of bounds of ``
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace