diff --git a/library/alloctests/tests/str.rs b/library/alloctests/tests/str.rs index fcc4aaaa1dcd..096df0007b65 100644 --- a/library/alloctests/tests/str.rs +++ b/library/alloctests/tests/str.rs @@ -630,13 +630,13 @@ mod slice_index { // note: using 0 specifically ensures that the result of overflowing is 0..0, // so that `get` doesn't simply return None for the wrong reason. bad: data[0..=usize::MAX]; - message: "maximum usize"; + message: "out of bounds"; } in mod rangetoinclusive { data: "hello"; bad: data[..=usize::MAX]; - message: "maximum usize"; + message: "out of bounds"; } } } @@ -659,49 +659,49 @@ mod slice_index { data: super::DATA; bad: data[super::BAD_START..super::GOOD_END]; message: - "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; + "start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; } in mod range_2 { data: super::DATA; bad: data[super::GOOD_START..super::BAD_END]; message: - "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; + "end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; } in mod rangefrom { data: super::DATA; bad: data[super::BAD_START..]; message: - "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; + "start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; } in mod rangeto { data: super::DATA; bad: data[..super::BAD_END]; message: - "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; + "end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; } in mod rangeinclusive_1 { data: super::DATA; bad: data[super::BAD_START..=super::GOOD_END_INCL]; message: - "byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; + "start byte index 4 is not a char boundary; it is inside 'α' (bytes 3..5) of"; } in mod rangeinclusive_2 { data: super::DATA; bad: data[super::GOOD_START..=super::BAD_END_INCL]; message: - "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; + "end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; } in mod rangetoinclusive { data: super::DATA; bad: data[..=super::BAD_END_INCL]; message: - "byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; + "end byte index 6 is not a char boundary; it is inside 'β' (bytes 5..7) of"; } } } @@ -716,7 +716,9 @@ mod slice_index { // check the panic includes the prefix of the sliced string #[test] - #[should_panic(expected = "byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet")] + #[should_panic( + expected = "end byte index 1024 is out of bounds of `Lorem ipsum dolor sit amet" + )] fn test_slice_fail_truncated_1() { let _ = &LOREM_PARAGRAPH[..1024]; } diff --git a/library/core/src/range.rs b/library/core/src/range.rs index fe488355ad15..0ef0d192a868 100644 --- a/library/core/src/range.rs +++ b/library/core/src/range.rs @@ -352,15 +352,6 @@ impl RangeInclusive { } } -impl RangeInclusive { - /// Converts to an exclusive `Range` for `SliceIndex` implementations. - /// The caller is responsible for dealing with `last == usize::MAX`. - #[inline] - pub(crate) const fn into_slice_range(self) -> Range { - Range { start: self.start, end: self.last + 1 } - } -} - #[stable(feature = "new_range_inclusive_api", since = "CURRENT_RUSTC_VERSION")] #[rustc_const_unstable(feature = "const_range", issue = "none")] impl const RangeBounds for RangeInclusive { diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index ab7389a1300c..5483b8e17bc3 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -85,34 +85,50 @@ fn slice_error_fail_rt(s: &str, begin: usize, end: usize) -> ! { let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH); let s_trunc = &s[..trunc_len]; let ellipsis = if trunc_len < s.len() { "[...]" } else { "" }; + let len = s.len(); - // 1. out of bounds - if begin > s.len() || end > s.len() { - let oob_index = if begin > s.len() { begin } else { end }; - panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}"); + // 1. begin is OOB. + if begin > len { + panic!("start byte index {begin} is out of bounds of `{s_trunc}`{ellipsis}"); } - // 2. begin <= end - assert!( - begin <= end, - "begin <= end ({} <= {}) when slicing `{}`{}", - begin, - end, - s_trunc, - ellipsis - ); + // 2. end is OOB. + if end > len { + panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}"); + } - // 3. character boundary - let index = if !s.is_char_boundary(begin) { begin } else { end }; - // find the character - let char_start = s.floor_char_boundary(index); - // `char_start` must be less than len and a char boundary - let ch = s[char_start..].chars().next().unwrap(); - let char_range = char_start..char_start + ch.len_utf8(); - panic!( - "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}", - index, ch, char_range, s_trunc, ellipsis - ); + // 3. range is backwards. + if begin > end { + panic!("begin <= end ({begin} <= {end}) when slicing `{s_trunc}`{ellipsis}") + } + + // 4. begin is inside a character. + if !s.is_char_boundary(begin) { + let floor = s.floor_char_boundary(begin); + let ceil = s.ceil_char_boundary(begin); + let range = floor..ceil; + let ch = s[floor..ceil].chars().next().unwrap(); + panic!( + "start byte index {begin} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}" + ) + } + + // 5. end is inside a character. + if !s.is_char_boundary(end) { + let floor = s.floor_char_boundary(end); + let ceil = s.ceil_char_boundary(end); + let range = floor..ceil; + let ch = s[floor..ceil].chars().next().unwrap(); + panic!( + "end byte index {end} is not a char boundary; it is inside {ch:?} (bytes {range:?}) of `{s_trunc}`{ellipsis}" + ) + } + + // 6. end is OOB and range is inclusive (end == len). + // This test cannot be combined with 2. above because for cases like + // `"abcαβγ"[4..9]` the error is that 4 is inside 'α', not that 9 is OOB. + debug_assert_eq!(end, len); + panic!("end byte index {end} is out of bounds of `{s_trunc}`{ellipsis}"); } impl str { diff --git a/library/core/src/str/traits.rs b/library/core/src/str/traits.rs index b63fe96ea99d..6cac9418f9d7 100644 --- a/library/core/src/str/traits.rs +++ b/library/core/src/str/traits.rs @@ -76,13 +76,6 @@ where } } -#[inline(never)] -#[cold] -#[track_caller] -const fn str_index_overflow_fail() -> ! { - panic!("attempted to index str up to maximum usize"); -} - /// Implements substring slicing with syntax `&self[..]` or `&mut self[..]`. /// /// Returns a slice of the whole string, i.e., returns `&self` or `&mut @@ -640,11 +633,11 @@ unsafe impl const SliceIndex for ops::RangeInclusive { type Output = str; #[inline] fn get(self, slice: &str) -> Option<&Self::Output> { - if *self.end() == usize::MAX { None } else { self.into_slice_range().get(slice) } + if *self.end() >= slice.len() { None } else { self.into_slice_range().get(slice) } } #[inline] fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> { - if *self.end() == usize::MAX { None } else { self.into_slice_range().get_mut(slice) } + if *self.end() >= slice.len() { None } else { self.into_slice_range().get_mut(slice) } } #[inline] unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output { @@ -658,17 +651,37 @@ unsafe impl const SliceIndex for ops::RangeInclusive { } #[inline] fn index(self, slice: &str) -> &Self::Output { - if *self.end() == usize::MAX { - str_index_overflow_fail(); + let Self { mut start, mut end, exhausted } = self; + let len = slice.len(); + if end < len { + end = end + 1; + start = if exhausted { end } else { start }; + if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) { + // SAFETY: just checked that `start` and `end` are on a char boundary, + // and we are passing in a safe reference, so the return value will also be one. + // We also checked char boundaries, so this is valid UTF-8. + unsafe { return &*(start..end).get_unchecked(slice) } + } } - self.into_slice_range().index(slice) + + super::slice_error_fail(slice, start, end) } #[inline] fn index_mut(self, slice: &mut str) -> &mut Self::Output { - if *self.end() == usize::MAX { - str_index_overflow_fail(); + let Self { mut start, mut end, exhausted } = self; + let len = slice.len(); + if end < len { + end = end + 1; + start = if exhausted { end } else { start }; + if start <= end && slice.is_char_boundary(start) && slice.is_char_boundary(end) { + // SAFETY: just checked that `start` and `end` are on a char boundary, + // and we are passing in a safe reference, so the return value will also be one. + // We also checked char boundaries, so this is valid UTF-8. + unsafe { return &mut *(start..end).get_unchecked_mut(slice) } + } } - self.into_slice_range().index_mut(slice) + + super::slice_error_fail(slice, start, end) } } @@ -678,35 +691,29 @@ unsafe impl const SliceIndex for range::RangeInclusive { type Output = str; #[inline] fn get(self, slice: &str) -> Option<&Self::Output> { - if self.last == usize::MAX { None } else { self.into_slice_range().get(slice) } + ops::RangeInclusive::from(self).get(slice) } #[inline] fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> { - if self.last == usize::MAX { None } else { self.into_slice_range().get_mut(slice) } + ops::RangeInclusive::from(self).get_mut(slice) } #[inline] unsafe fn get_unchecked(self, slice: *const str) -> *const Self::Output { // SAFETY: the caller must uphold the safety contract for `get_unchecked`. - unsafe { self.into_slice_range().get_unchecked(slice) } + unsafe { ops::RangeInclusive::from(self).get_unchecked(slice) } } #[inline] unsafe fn get_unchecked_mut(self, slice: *mut str) -> *mut Self::Output { // SAFETY: the caller must uphold the safety contract for `get_unchecked_mut`. - unsafe { self.into_slice_range().get_unchecked_mut(slice) } + unsafe { ops::RangeInclusive::from(self).get_unchecked_mut(slice) } } #[inline] fn index(self, slice: &str) -> &Self::Output { - if self.last == usize::MAX { - str_index_overflow_fail(); - } - self.into_slice_range().index(slice) + ops::RangeInclusive::from(self).index(slice) } #[inline] fn index_mut(self, slice: &mut str) -> &mut Self::Output { - if self.last == usize::MAX { - str_index_overflow_fail(); - } - self.into_slice_range().index_mut(slice) + ops::RangeInclusive::from(self).index_mut(slice) } } diff --git a/tests/codegen-llvm/str-range-indexing.rs b/tests/codegen-llvm/str-range-indexing.rs index 3c35b26773cb..dee1a3a41c46 100644 --- a/tests/codegen-llvm/str-range-indexing.rs +++ b/tests/codegen-llvm/str-range-indexing.rs @@ -35,18 +35,18 @@ macro_rules! tests { // CHECK: ret tests!(Range, get_range, index_range); -// 9 comparisons required: -// end != usize::MAX && start <= end + 1 -// && (start == 0 || (start >= len && start == len) || bytes[start] >= -0x40) -// && ( (end + 1 >= len && end + 1 == len) || bytes[end + 1] >= -0x40) +// 7 comparisons required: +// end < len && start <= end + 1 +// && (start == 0 || start >= len || bytes[start] >= -0x40) +// && ( end + 1 >= len || bytes[end + 1] >= -0x40) // CHECK-LABEL: @get_range_inclusive -// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-COUNT-7: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret // CHECK-LABEL: @index_range_inclusive -// CHECK-COUNT-9: %{{.+}} = icmp +// CHECK-COUNT-7: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret tests!(RangeInclusive, get_range_inclusive, index_range_inclusive); @@ -65,16 +65,16 @@ tests!(RangeInclusive, get_range_inclusive, index_range_inclusive); // CHECK: ret tests!(RangeTo, get_range_to, index_range_to); -// 4 comparisons required: -// end != usize::MAX && (end + 1 >= len && end + 1 == len) || bytes[end + 1] >= -0x40) +// 3 comparisons required: +// end < len && (end + 1 >= len || bytes[end + 1] >= -0x40) // CHECK-LABEL: @get_range_to_inclusive -// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-COUNT-3: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret // CHECK-LABEL: @index_range_to_inclusive -// CHECK-COUNT-4: %{{.+}} = icmp +// CHECK-COUNT-3: %{{.+}} = icmp // CHECK-NOT: %{{.+}} = icmp // CHECK: ret tests!(RangeToInclusive, get_range_to_inclusive, index_range_to_inclusive); diff --git a/tests/ui/intrinsics/const-eval-select-backtrace-std.run.stderr b/tests/ui/intrinsics/const-eval-select-backtrace-std.run.stderr index 397eeaf600ad..aee60c94f106 100644 --- a/tests/ui/intrinsics/const-eval-select-backtrace-std.run.stderr +++ b/tests/ui/intrinsics/const-eval-select-backtrace-std.run.stderr @@ -1,4 +1,4 @@ thread 'main' ($TID) panicked at $DIR/const-eval-select-backtrace-std.rs:6:8: -byte index 1 is out of bounds of `` +start byte index 1 is out of bounds of `` note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace