Rollup merge of #144472 - okaneco:char_bound, r=Mark-Simulacrum

str: Mark unstable `round_char_boundary` feature functions as const Mark `floor_char_boundary`, `ceil_char_boundary` const Simplify the implementations, reducing the number of arithmetic operations It seems unnecessary to do the lower/upper bounds calculations and extra slicing when we can jump straight to inspecting the bytes, assuming the underlying data is valid UTF-8. Tracking issue https://github.com/rust-lang/rust/issues/93743
2025-07-28 08:36:53 +02:00 · 2025-07-28 08:36:53 +02:00 · e36b844b4e
commit e36b844b4e
parent 21120e297c 7f7d343400
1 changed files with 25 additions and 13 deletions
--- a/library/core/src/str/mod.rs
+++ b/library/core/src/str/mod.rs
@ -407,17 +407,22 @@ impl str {
    /// ```
    #[unstable(feature = "round_char_boundary", issue = "93743")]
    #[inline]
-    pub fn floor_char_boundary(&self, index: usize) -> usize {
+    pub const fn floor_char_boundary(&self, index: usize) -> usize {
        if index >= self.len() {
            self.len()
        } else {
-            let lower_bound = index.saturating_sub(3);
-            let new_index = self.as_bytes()[lower_bound..=index]
-                .iter()
-                .rposition(|b| b.is_utf8_char_boundary());
+            let mut i = index;
+            while i > 0 {
+                if self.as_bytes()[i].is_utf8_char_boundary() {
+                    break;
+                }
+                i -= 1;
+            }

-            // SAFETY: we know that the character boundary will be within four bytes
-            unsafe { lower_bound + new_index.unwrap_unchecked() }
+            //  The character boundary will be within four bytes of the index
+            debug_assert!(i >= index.saturating_sub(3));
+
+            i
        }
    }

@ -445,15 +450,22 @@ impl str {
    /// ```
    #[unstable(feature = "round_char_boundary", issue = "93743")]
    #[inline]
-    pub fn ceil_char_boundary(&self, index: usize) -> usize {
+    pub const fn ceil_char_boundary(&self, index: usize) -> usize {
        if index >= self.len() {
            self.len()
        } else {
-            let upper_bound = Ord::min(index + 4, self.len());
-            self.as_bytes()[index..upper_bound]
-                .iter()
-                .position(|b| b.is_utf8_char_boundary())
-                .map_or(upper_bound, |pos| pos + index)
+            let mut i = index;
+            while i < self.len() {
+                if self.as_bytes()[i].is_utf8_char_boundary() {
+                    break;
+                }
+                i += 1;
+            }
+
+            //  The character boundary will be within four bytes of the index
+            debug_assert!(i <= index + 3);
+
+            i
        }
    }