Rollup merge of #144472 - okaneco:char_bound, r=Mark-Simulacrum
str: Mark unstable `round_char_boundary` feature functions as const Mark `floor_char_boundary`, `ceil_char_boundary` const Simplify the implementations, reducing the number of arithmetic operations It seems unnecessary to do the lower/upper bounds calculations and extra slicing when we can jump straight to inspecting the bytes, assuming the underlying data is valid UTF-8. Tracking issue https://github.com/rust-lang/rust/issues/93743
This commit is contained in:
commit
e36b844b4e
1 changed files with 25 additions and 13 deletions
|
|
@ -407,17 +407,22 @@ impl str {
|
|||
/// ```
|
||||
#[unstable(feature = "round_char_boundary", issue = "93743")]
|
||||
#[inline]
|
||||
pub fn floor_char_boundary(&self, index: usize) -> usize {
|
||||
pub const fn floor_char_boundary(&self, index: usize) -> usize {
|
||||
if index >= self.len() {
|
||||
self.len()
|
||||
} else {
|
||||
let lower_bound = index.saturating_sub(3);
|
||||
let new_index = self.as_bytes()[lower_bound..=index]
|
||||
.iter()
|
||||
.rposition(|b| b.is_utf8_char_boundary());
|
||||
let mut i = index;
|
||||
while i > 0 {
|
||||
if self.as_bytes()[i].is_utf8_char_boundary() {
|
||||
break;
|
||||
}
|
||||
i -= 1;
|
||||
}
|
||||
|
||||
// SAFETY: we know that the character boundary will be within four bytes
|
||||
unsafe { lower_bound + new_index.unwrap_unchecked() }
|
||||
// The character boundary will be within four bytes of the index
|
||||
debug_assert!(i >= index.saturating_sub(3));
|
||||
|
||||
i
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -445,15 +450,22 @@ impl str {
|
|||
/// ```
|
||||
#[unstable(feature = "round_char_boundary", issue = "93743")]
|
||||
#[inline]
|
||||
pub fn ceil_char_boundary(&self, index: usize) -> usize {
|
||||
pub const fn ceil_char_boundary(&self, index: usize) -> usize {
|
||||
if index >= self.len() {
|
||||
self.len()
|
||||
} else {
|
||||
let upper_bound = Ord::min(index + 4, self.len());
|
||||
self.as_bytes()[index..upper_bound]
|
||||
.iter()
|
||||
.position(|b| b.is_utf8_char_boundary())
|
||||
.map_or(upper_bound, |pos| pos + index)
|
||||
let mut i = index;
|
||||
while i < self.len() {
|
||||
if self.as_bytes()[i].is_utf8_char_boundary() {
|
||||
break;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// The character boundary will be within four bytes of the index
|
||||
debug_assert!(i <= index + 3);
|
||||
|
||||
i
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue