From d31ca4fc8ede3b4a28abea7df57e01591ee8bb7d Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Fri, 4 Sep 2020 07:34:23 +0000 Subject: [PATCH] Move Utf8Error to new mod --- library/core/src/str/error.rs | 129 +++++++++++++++++++++++++++++++++ library/core/src/str/mod.rs | 131 ++-------------------------------- 2 files changed, 134 insertions(+), 126 deletions(-) create mode 100644 library/core/src/str/error.rs diff --git a/library/core/src/str/error.rs b/library/core/src/str/error.rs new file mode 100644 index 000000000000..43b790a4aca9 --- /dev/null +++ b/library/core/src/str/error.rs @@ -0,0 +1,129 @@ +//! Defines utf8 error type. + +use crate::fmt; + +/// Errors which can occur when attempting to interpret a sequence of [`u8`] +/// as a string. +/// +/// As such, the `from_utf8` family of functions and methods for both [`String`]s +/// and [`&str`]s make use of this error, for example. +/// +/// [`String`]: ../../std/string/struct.String.html#method.from_utf8 +/// [`&str`]: from_utf8 +/// +/// # Examples +/// +/// This error type’s methods can be used to create functionality +/// similar to `String::from_utf8_lossy` without allocating heap memory: +/// +/// ``` +/// fn from_utf8_lossy(mut input: &[u8], mut push: F) where F: FnMut(&str) { +/// loop { +/// match std::str::from_utf8(input) { +/// Ok(valid) => { +/// push(valid); +/// break +/// } +/// Err(error) => { +/// let (valid, after_valid) = input.split_at(error.valid_up_to()); +/// unsafe { +/// push(std::str::from_utf8_unchecked(valid)) +/// } +/// push("\u{FFFD}"); +/// +/// if let Some(invalid_sequence_length) = error.error_len() { +/// input = &after_valid[invalid_sequence_length..] +/// } else { +/// break +/// } +/// } +/// } +/// } +/// } +/// ``` +#[derive(Copy, Eq, PartialEq, Clone, Debug)] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct Utf8Error { + pub(super) valid_up_to: usize, + pub(super) error_len: Option, +} + +impl Utf8Error { + /// Returns the index in the given string up to which valid UTF-8 was + /// verified. + /// + /// It is the maximum index such that `from_utf8(&input[..index])` + /// would return `Ok(_)`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::str; + /// + /// // some invalid bytes, in a vector + /// let sparkle_heart = vec![0, 159, 146, 150]; + /// + /// // std::str::from_utf8 returns a Utf8Error + /// let error = str::from_utf8(&sparkle_heart).unwrap_err(); + /// + /// // the second byte is invalid here + /// assert_eq!(1, error.valid_up_to()); + /// ``` + #[stable(feature = "utf8_error", since = "1.5.0")] + pub fn valid_up_to(&self) -> usize { + self.valid_up_to + } + + /// Provides more information about the failure: + /// + /// * `None`: the end of the input was reached unexpectedly. + /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input. + /// If a byte stream (such as a file or a network socket) is being decoded incrementally, + /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks. + /// + /// * `Some(len)`: an unexpected byte was encountered. + /// The length provided is that of the invalid byte sequence + /// that starts at the index given by `valid_up_to()`. + /// Decoding should resume after that sequence + /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of + /// lossy decoding. + /// + /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html + #[stable(feature = "utf8_error_error_len", since = "1.20.0")] + pub fn error_len(&self) -> Option { + self.error_len.map(|len| len as usize) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for Utf8Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(error_len) = self.error_len { + write!( + f, + "invalid utf-8 sequence of {} bytes from index {}", + error_len, self.valid_up_to + ) + } else { + write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to) + } + } +} + +/// An error returned when parsing a `bool` using [`from_str`] fails +/// +/// [`from_str`]: FromStr::from_str +#[derive(Debug, Clone, PartialEq, Eq)] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct ParseBoolError { + pub(super) _priv: (), +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for ParseBoolError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + "provided string was not `true` or `false`".fmt(f) + } +} diff --git a/library/core/src/str/mod.rs b/library/core/src/str/mod.rs index e4a6b7e142a5..b00d94f4858d 100644 --- a/library/core/src/str/mod.rs +++ b/library/core/src/str/mod.rs @@ -8,6 +8,8 @@ #![stable(feature = "rust1", since = "1.0.0")] +mod error; + use self::pattern::Pattern; use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher}; @@ -27,6 +29,9 @@ pub mod pattern; #[allow(missing_docs)] pub mod lossy; +#[stable(feature = "rust1", since = "1.0.0")] +pub use error::{ParseBoolError, Utf8Error}; + /// Parse a value from a string /// /// `FromStr`'s [`from_str`] method is often used implicitly, through @@ -138,121 +143,10 @@ impl FromStr for bool { } } -/// An error returned when parsing a `bool` using [`from_str`] fails -/// -/// [`from_str`]: FromStr::from_str -#[derive(Debug, Clone, PartialEq, Eq)] -#[stable(feature = "rust1", since = "1.0.0")] -pub struct ParseBoolError { - _priv: (), -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl fmt::Display for ParseBoolError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - "provided string was not `true` or `false`".fmt(f) - } -} - /* Section: Creating a string */ -/// Errors which can occur when attempting to interpret a sequence of [`u8`] -/// as a string. -/// -/// As such, the `from_utf8` family of functions and methods for both [`String`]s -/// and [`&str`]s make use of this error, for example. -/// -/// [`String`]: ../../std/string/struct.String.html#method.from_utf8 -/// [`&str`]: from_utf8 -/// -/// # Examples -/// -/// This error type’s methods can be used to create functionality -/// similar to `String::from_utf8_lossy` without allocating heap memory: -/// -/// ``` -/// fn from_utf8_lossy(mut input: &[u8], mut push: F) where F: FnMut(&str) { -/// loop { -/// match std::str::from_utf8(input) { -/// Ok(valid) => { -/// push(valid); -/// break -/// } -/// Err(error) => { -/// let (valid, after_valid) = input.split_at(error.valid_up_to()); -/// unsafe { -/// push(std::str::from_utf8_unchecked(valid)) -/// } -/// push("\u{FFFD}"); -/// -/// if let Some(invalid_sequence_length) = error.error_len() { -/// input = &after_valid[invalid_sequence_length..] -/// } else { -/// break -/// } -/// } -/// } -/// } -/// } -/// ``` -#[derive(Copy, Eq, PartialEq, Clone, Debug)] -#[stable(feature = "rust1", since = "1.0.0")] -pub struct Utf8Error { - valid_up_to: usize, - error_len: Option, -} - -impl Utf8Error { - /// Returns the index in the given string up to which valid UTF-8 was - /// verified. - /// - /// It is the maximum index such that `from_utf8(&input[..index])` - /// would return `Ok(_)`. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// use std::str; - /// - /// // some invalid bytes, in a vector - /// let sparkle_heart = vec![0, 159, 146, 150]; - /// - /// // std::str::from_utf8 returns a Utf8Error - /// let error = str::from_utf8(&sparkle_heart).unwrap_err(); - /// - /// // the second byte is invalid here - /// assert_eq!(1, error.valid_up_to()); - /// ``` - #[stable(feature = "utf8_error", since = "1.5.0")] - pub fn valid_up_to(&self) -> usize { - self.valid_up_to - } - - /// Provides more information about the failure: - /// - /// * `None`: the end of the input was reached unexpectedly. - /// `self.valid_up_to()` is 1 to 3 bytes from the end of the input. - /// If a byte stream (such as a file or a network socket) is being decoded incrementally, - /// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks. - /// - /// * `Some(len)`: an unexpected byte was encountered. - /// The length provided is that of the invalid byte sequence - /// that starts at the index given by `valid_up_to()`. - /// Decoding should resume after that sequence - /// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of - /// lossy decoding. - /// - /// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html - #[stable(feature = "utf8_error_error_len", since = "1.20.0")] - pub fn error_len(&self) -> Option { - self.error_len.map(|len| len as usize) - } -} - /// Converts a slice of bytes to a string slice. /// /// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice @@ -440,21 +334,6 @@ pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str { unsafe { &mut *(v as *mut [u8] as *mut str) } } -#[stable(feature = "rust1", since = "1.0.0")] -impl fmt::Display for Utf8Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(error_len) = self.error_len { - write!( - f, - "invalid utf-8 sequence of {} bytes from index {}", - error_len, self.valid_up_to - ) - } else { - write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to) - } - } -} - /* Section: Iterators */