Auto merge of #34485 - tbu-:pr_unicode_debug_str, r=alexcrichton

Escape fewer Unicode codepoints in `Debug` impl of `str`

Use the same procedure as Python to determine whether a character is
printable, described in [PEP 3138]. In particular, this means that the
following character classes are escaped:

- Cc (Other, Control)
- Cf (Other, Format)
- Cs (Other, Surrogate), even though they can't appear in Rust strings
- Co (Other, Private Use)
- Cn (Other, Not Assigned)
- Zl (Separator, Line)
- Zp (Separator, Paragraph)
- Zs (Separator, Space), except for the ASCII space `' '` `0x20`

This allows for user-friendly inspection of strings that are not
English (e.g. compare `"\u{e9}\u{e8}\u{ea}"` to `"éèê"`).

Fixes #34318.
CC #34422.

[PEP 3138]: https://www.python.org/dev/peps/pep-3138/
This commit is contained in:
bors 2016-07-28 11:20:33 -07:00 committed by GitHub
commit d1df3fecdf
16 changed files with 1032 additions and 29 deletions

View file

@ -218,8 +218,9 @@
#![feature(associated_consts)]
#![feature(borrow_state)]
#![feature(box_syntax)]
#![feature(cfg_target_vendor)]
#![feature(cfg_target_thread_local)]
#![feature(cfg_target_vendor)]
#![feature(char_escape_debug)]
#![feature(char_internals)]
#![feature(collections)]
#![feature(collections_bound)]
@ -229,10 +230,10 @@
#![feature(dropck_parametricity)]
#![feature(float_extras)]
#![feature(float_from_str_radix)]
#![feature(fnbox)]
#![feature(fn_traits)]
#![feature(heap_api)]
#![feature(fnbox)]
#![feature(hashmap_hasher)]
#![feature(heap_api)]
#![feature(inclusive_range)]
#![feature(int_error_internals)]
#![feature(into_cow)]
@ -242,6 +243,7 @@
#![feature(linkage)]
#![feature(macro_reexport)]
#![cfg_attr(test, feature(map_values_mut))]
#![feature(needs_panic_runtime)]
#![feature(num_bits_bytes)]
#![feature(old_wrapping)]
#![feature(on_unimplemented)]
@ -249,10 +251,11 @@
#![feature(optin_builtin_traits)]
#![feature(panic_unwind)]
#![feature(placement_in_syntax)]
#![feature(question_mark)]
#![feature(rand)]
#![feature(raw)]
#![feature(repr_simd)]
#![feature(reflect_marker)]
#![feature(repr_simd)]
#![feature(rustc_attrs)]
#![feature(shared)]
#![feature(sip_hash_13)]
@ -266,6 +269,7 @@
#![feature(str_utf16)]
#![feature(test, rustc_private)]
#![feature(thread_local)]
#![feature(try_from)]
#![feature(unboxed_closures)]
#![feature(unicode)]
#![feature(unique)]
@ -273,9 +277,6 @@
#![feature(unwind_attributes)]
#![feature(vec_push_all)]
#![feature(zero_one)]
#![feature(question_mark)]
#![feature(try_from)]
#![feature(needs_panic_runtime)]
// Issue# 30592: Systematically use alloc_system during stage0 since jemalloc
// might be unavailable or disabled

View file

@ -390,7 +390,7 @@ impl fmt::Debug for Wtf8 {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
fn write_str_escaped(f: &mut fmt::Formatter, s: &str) -> fmt::Result {
use fmt::Write;
for c in s.chars().flat_map(|c| c.escape_default()) {
for c in s.chars().flat_map(|c| c.escape_debug()) {
f.write_char(c)?
}
Ok(())
@ -1064,9 +1064,9 @@ mod tests {
#[test]
fn wtf8buf_show() {
let mut string = Wtf8Buf::from_str("a\té 💩\r");
let mut string = Wtf8Buf::from_str("a\té \u{7f}💩\r");
string.push(CodePoint::from_u32(0xD800).unwrap());
assert_eq!(format!("{:?}", string), r#""a\t\u{e9} \u{1f4a9}\r\u{D800}""#);
assert_eq!(format!("{:?}", string), "\"a\\\\u{7f}\u{1f4a9}\\r\\u{D800}\"");
}
#[test]