Auto merge of #34485 - tbu-:pr_unicode_debug_str, r=alexcrichton

Escape fewer Unicode codepoints in `Debug` impl of `str`

Use the same procedure as Python to determine whether a character is
printable, described in [PEP 3138]. In particular, this means that the
following character classes are escaped:

- Cc (Other, Control)
- Cf (Other, Format)
- Cs (Other, Surrogate), even though they can't appear in Rust strings
- Co (Other, Private Use)
- Cn (Other, Not Assigned)
- Zl (Separator, Line)
- Zp (Separator, Paragraph)
- Zs (Separator, Space), except for the ASCII space `' '` `0x20`

This allows for user-friendly inspection of strings that are not
English (e.g. compare `"\u{e9}\u{e8}\u{ea}"` to `"éèê"`).

Fixes #34318.
CC #34422.

[PEP 3138]: https://www.python.org/dev/peps/pep-3138/
This commit is contained in:
bors 2016-07-28 11:20:33 -07:00 committed by GitHub
commit d1df3fecdf
16 changed files with 1032 additions and 29 deletions

View file

@ -36,7 +36,7 @@ use tables::{conversions, derived_property, general_category, property};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::char::{MAX, from_digit, from_u32, from_u32_unchecked};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDefault, EscapeUnicode};
pub use core::char::{EncodeUtf16, EncodeUtf8, EscapeDebug, EscapeDefault, EscapeUnicode};
// unstable reexports
#[unstable(feature = "decode_utf8", issue = "33906")]
@ -267,6 +267,41 @@ impl char {
C::escape_unicode(self)
}
/// Returns an iterator that yields the literal escape code of a `char`.
///
/// This will escape the characters similar to the `Debug` implementations
/// of `str` or `char`.
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// for i in '\n'.escape_default() {
/// println!("{}", i);
/// }
/// ```
///
/// This prints:
///
/// ```text
/// \
/// n
/// ```
///
/// Collecting into a `String`:
///
/// ```
/// let quote: String = '\n'.escape_default().collect();
///
/// assert_eq!(quote, "\\n");
/// ```
#[unstable(feature = "char_escape_debug", issue = "35068")]
#[inline]
pub fn escape_debug(self) -> EscapeDebug {
C::escape_debug(self)
}
/// Returns an iterator that yields the literal escape code of a `char`.
///
/// The default is chosen with a bias toward producing literals that are

View file

@ -32,6 +32,7 @@
#![cfg_attr(not(stage0), deny(warnings))]
#![no_std]
#![feature(char_escape_debug)]
#![feature(core_char_ext)]
#![feature(decode_utf8)]
#![feature(lang_items)]