Auto merge of #26039 - SimonSapin:case-mapping, r=alexcrichton
* Add “complex” mappings to `char::to_lowercase` and `char::to_uppercase`, making them yield sometimes more than on `char`: #25800. `str::to_lowercase` and `str::to_uppercase` are affected as well. * Add `char::to_titlecase`, since it’s the same algorithm (just different data). However this does **not** add `str::to_titlecase`, as that would require UAX#29 Unicode Text Segmentation which we decided not to include in of `std`: https://github.com/rust-lang/rfcs/pull/1054 I made `char::to_titlecase` immediately `#[stable]`, since it’s so similar to `char::to_uppercase` that’s already stable. Let me know if it should be `#[unstable]` for a while. * Add a special case for upper-case Sigma in word-final position in `str::to_lowercase`: #26035. This is the only language-independent conditional mapping currently in `SpecialCasing.txt`. * Stabilize `str::to_lowercase` and `str::to_uppercase`. The `&self -> String` on `str` signature seems straightforward enough, and the only relevant issue I’ve found is #24536 about naming. But `char` already has stable methods with the same name, and deprecating them for a rename doesn’t seem worth it. r? @alexcrichton
This commit is contained in:
commit
f06e026578
8 changed files with 1974 additions and 672 deletions
|
|
@ -72,8 +72,9 @@ def is_surrogate(n):
|
|||
def load_unicode_data(f):
|
||||
fetch(f)
|
||||
gencats = {}
|
||||
upperlower = {}
|
||||
lowerupper = {}
|
||||
to_lower = {}
|
||||
to_upper = {}
|
||||
to_title = {}
|
||||
combines = {}
|
||||
canon_decomp = {}
|
||||
compat_decomp = {}
|
||||
|
|
@ -103,12 +104,16 @@ def load_unicode_data(f):
|
|||
|
||||
# generate char to char direct common and simple conversions
|
||||
# uppercase to lowercase
|
||||
if gencat == "Lu" and lowcase != "" and code_org != lowcase:
|
||||
upperlower[code] = int(lowcase, 16)
|
||||
if lowcase != "" and code_org != lowcase:
|
||||
to_lower[code] = (int(lowcase, 16), 0, 0)
|
||||
|
||||
# lowercase to uppercase
|
||||
if gencat == "Ll" and upcase != "" and code_org != upcase:
|
||||
lowerupper[code] = int(upcase, 16)
|
||||
if upcase != "" and code_org != upcase:
|
||||
to_upper[code] = (int(upcase, 16), 0, 0)
|
||||
|
||||
# title case
|
||||
if titlecase.strip() != "" and code_org != titlecase:
|
||||
to_title[code] = (int(titlecase, 16), 0, 0)
|
||||
|
||||
# store decomposition, if given
|
||||
if decomp != "":
|
||||
|
|
@ -144,7 +149,32 @@ def load_unicode_data(f):
|
|||
gencats = group_cats(gencats)
|
||||
combines = to_combines(group_cats(combines))
|
||||
|
||||
return (canon_decomp, compat_decomp, gencats, combines, lowerupper, upperlower)
|
||||
return (canon_decomp, compat_decomp, gencats, combines, to_upper, to_lower, to_title)
|
||||
|
||||
def load_special_casing(f, to_upper, to_lower, to_title):
|
||||
fetch(f)
|
||||
for line in fileinput.input(f):
|
||||
data = line.split('#')[0].split(';')
|
||||
if len(data) == 5:
|
||||
code, lower, title, upper, _comment = data
|
||||
elif len(data) == 6:
|
||||
code, lower, title, upper, condition, _comment = data
|
||||
if condition.strip(): # Only keep unconditional mappins
|
||||
continue
|
||||
else:
|
||||
continue
|
||||
code = code.strip()
|
||||
lower = lower.strip()
|
||||
title = title.strip()
|
||||
upper = upper.strip()
|
||||
key = int(code, 16)
|
||||
for (map_, values) in [(to_lower, lower), (to_upper, upper), (to_title, title)]:
|
||||
if values != code:
|
||||
values = [int(i, 16) for i in values.split()]
|
||||
for _ in range(len(values), 3):
|
||||
values.append(0)
|
||||
assert len(values) == 3
|
||||
map_[key] = values
|
||||
|
||||
def group_cats(cats):
|
||||
cats_out = {}
|
||||
|
|
@ -279,7 +309,7 @@ def load_east_asian_width(want_widths, except_cats):
|
|||
return widths
|
||||
|
||||
def escape_char(c):
|
||||
return "'\\u{%x}'" % c
|
||||
return "'\\u{%x}'" % c if c != 0 else "'\\0'"
|
||||
|
||||
def emit_bsearch_range_table(f):
|
||||
f.write("""
|
||||
|
|
@ -319,7 +349,7 @@ def emit_property_module(f, mod, tbl, emit):
|
|||
f.write(" }\n\n")
|
||||
f.write("}\n\n")
|
||||
|
||||
def emit_conversions_module(f, lowerupper, upperlower):
|
||||
def emit_conversions_module(f, to_upper, to_lower, to_title):
|
||||
f.write("pub mod conversions {")
|
||||
f.write("""
|
||||
use core::cmp::Ordering::{Equal, Less, Greater};
|
||||
|
|
@ -328,21 +358,28 @@ def emit_conversions_module(f, lowerupper, upperlower):
|
|||
use core::option::Option::{Some, None};
|
||||
use core::result::Result::{Ok, Err};
|
||||
|
||||
pub fn to_lower(c: char) -> char {
|
||||
match bsearch_case_table(c, LuLl_table) {
|
||||
None => c,
|
||||
Some(index) => LuLl_table[index].1
|
||||
pub fn to_lower(c: char) -> [char; 3] {
|
||||
match bsearch_case_table(c, to_lowercase_table) {
|
||||
None => [c, '\\0', '\\0'],
|
||||
Some(index) => to_lowercase_table[index].1
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_upper(c: char) -> char {
|
||||
match bsearch_case_table(c, LlLu_table) {
|
||||
None => c,
|
||||
Some(index) => LlLu_table[index].1
|
||||
pub fn to_upper(c: char) -> [char; 3] {
|
||||
match bsearch_case_table(c, to_uppercase_table) {
|
||||
None => [c, '\\0', '\\0'],
|
||||
Some(index) => to_uppercase_table[index].1
|
||||
}
|
||||
}
|
||||
|
||||
fn bsearch_case_table(c: char, table: &'static [(char, char)]) -> Option<usize> {
|
||||
pub fn to_title(c: char) -> [char; 3] {
|
||||
match bsearch_case_table(c, to_titlecase_table) {
|
||||
None => [c, '\\0', '\\0'],
|
||||
Some(index) => to_titlecase_table[index].1
|
||||
}
|
||||
}
|
||||
|
||||
fn bsearch_case_table(c: char, table: &'static [(char, [char; 3])]) -> Option<usize> {
|
||||
match table.binary_search_by(|&(key, _)| {
|
||||
if c == key { Equal }
|
||||
else if key < c { Less }
|
||||
|
|
@ -354,10 +391,18 @@ def emit_conversions_module(f, lowerupper, upperlower):
|
|||
}
|
||||
|
||||
""")
|
||||
emit_table(f, "LuLl_table",
|
||||
sorted(upperlower.iteritems(), key=operator.itemgetter(0)), is_pub=False)
|
||||
emit_table(f, "LlLu_table",
|
||||
sorted(lowerupper.iteritems(), key=operator.itemgetter(0)), is_pub=False)
|
||||
t_type = "&'static [(char, [char; 3])]"
|
||||
pfun = lambda x: "(%s,[%s,%s,%s])" % (
|
||||
escape_char(x[0]), escape_char(x[1][0]), escape_char(x[1][1]), escape_char(x[1][2]))
|
||||
emit_table(f, "to_lowercase_table",
|
||||
sorted(to_lower.iteritems(), key=operator.itemgetter(0)),
|
||||
is_pub=False, t_type = t_type, pfun=pfun)
|
||||
emit_table(f, "to_uppercase_table",
|
||||
sorted(to_upper.iteritems(), key=operator.itemgetter(0)),
|
||||
is_pub=False, t_type = t_type, pfun=pfun)
|
||||
emit_table(f, "to_titlecase_table",
|
||||
sorted(to_title.iteritems(), key=operator.itemgetter(0)),
|
||||
is_pub=False, t_type = t_type, pfun=pfun)
|
||||
f.write("}\n\n")
|
||||
|
||||
def emit_grapheme_module(f, grapheme_table, grapheme_cats):
|
||||
|
|
@ -591,8 +636,10 @@ if __name__ == "__main__":
|
|||
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
|
||||
""" % unicode_version)
|
||||
(canon_decomp, compat_decomp, gencats, combines,
|
||||
lowerupper, upperlower) = load_unicode_data("UnicodeData.txt")
|
||||
want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase"]
|
||||
to_upper, to_lower, to_title) = load_unicode_data("UnicodeData.txt")
|
||||
load_special_casing("SpecialCasing.txt", to_upper, to_lower, to_title)
|
||||
want_derived = ["XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase",
|
||||
"Cased", "Case_Ignorable"]
|
||||
derived = load_properties("DerivedCoreProperties.txt", want_derived)
|
||||
scripts = load_properties("Scripts.txt", [])
|
||||
props = load_properties("PropList.txt",
|
||||
|
|
@ -611,7 +658,7 @@ pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
|
|||
|
||||
# normalizations and conversions module
|
||||
emit_norm_module(rf, canon_decomp, compat_decomp, combines, norm_props)
|
||||
emit_conversions_module(rf, lowerupper, upperlower)
|
||||
emit_conversions_module(rf, to_upper, to_lower, to_title)
|
||||
|
||||
### character width module
|
||||
width_table = []
|
||||
|
|
|
|||
|
|
@ -1816,11 +1816,40 @@ impl str {
|
|||
/// let s = "HELLO";
|
||||
/// assert_eq!(s.to_lowercase(), "hello");
|
||||
/// ```
|
||||
#[unstable(feature = "collections")]
|
||||
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
|
||||
pub fn to_lowercase(&self) -> String {
|
||||
let mut s = String::with_capacity(self.len());
|
||||
s.extend(self[..].chars().flat_map(|c| c.to_lowercase()));
|
||||
for (i, c) in self[..].char_indices() {
|
||||
if c == 'Σ' {
|
||||
// Σ maps to σ, except at the end of a word where it maps to ς.
|
||||
// This is the only conditional (contextual) but language-independent mapping
|
||||
// in `SpecialCasing.txt`,
|
||||
// so hard-code it rather than have a generic "condition" mechanim.
|
||||
// See https://github.com/rust-lang/rust/issues/26035
|
||||
map_uppercase_sigma(self, i, &mut s)
|
||||
} else {
|
||||
s.extend(c.to_lowercase());
|
||||
}
|
||||
}
|
||||
return s;
|
||||
|
||||
fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) {
|
||||
// See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
|
||||
// for the definition of `Final_Sigma`.
|
||||
debug_assert!('Σ'.len_utf8() == 2);
|
||||
let is_word_final =
|
||||
case_ignoreable_then_cased(from[..i].chars().rev()) &&
|
||||
!case_ignoreable_then_cased(from[i + 2..].chars());
|
||||
to.push_str(if is_word_final { "ς" } else { "σ" });
|
||||
}
|
||||
|
||||
fn case_ignoreable_then_cased<I: Iterator<Item=char>>(iter: I) -> bool {
|
||||
use rustc_unicode::derived_property::{Cased, Case_Ignorable};
|
||||
match iter.skip_while(|&c| Case_Ignorable(c)).next() {
|
||||
Some(c) => Cased(c),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the uppercase equivalent of this string.
|
||||
|
|
@ -1833,7 +1862,7 @@ impl str {
|
|||
/// let s = "hello";
|
||||
/// assert_eq!(s.to_uppercase(), "HELLO");
|
||||
/// ```
|
||||
#[unstable(feature = "collections")]
|
||||
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
|
||||
pub fn to_uppercase(&self) -> String {
|
||||
let mut s = String::with_capacity(self.len());
|
||||
s.extend(self[..].chars().flat_map(|c| c.to_uppercase()));
|
||||
|
|
|
|||
|
|
@ -1687,6 +1687,45 @@ fn trim_ws() {
|
|||
"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_lowercase() {
|
||||
assert_eq!("".to_lowercase(), "");
|
||||
assert_eq!("AÉDžaé ".to_lowercase(), "aédžaé ");
|
||||
|
||||
// https://github.com/rust-lang/rust/issues/26035
|
||||
assert_eq!("ΑΣ".to_lowercase(), "ας");
|
||||
assert_eq!("Α'Σ".to_lowercase(), "α'ς");
|
||||
assert_eq!("Α''Σ".to_lowercase(), "α''ς");
|
||||
|
||||
assert_eq!("ΑΣ Α".to_lowercase(), "ας α");
|
||||
assert_eq!("Α'Σ Α".to_lowercase(), "α'ς α");
|
||||
assert_eq!("Α''Σ Α".to_lowercase(), "α''ς α");
|
||||
|
||||
assert_eq!("ΑΣ' Α".to_lowercase(), "ας' α");
|
||||
assert_eq!("ΑΣ'' Α".to_lowercase(), "ας'' α");
|
||||
|
||||
assert_eq!("Α'Σ' Α".to_lowercase(), "α'ς' α");
|
||||
assert_eq!("Α''Σ'' Α".to_lowercase(), "α''ς'' α");
|
||||
|
||||
assert_eq!("Α Σ".to_lowercase(), "α σ");
|
||||
assert_eq!("Α 'Σ".to_lowercase(), "α 'σ");
|
||||
assert_eq!("Α ''Σ".to_lowercase(), "α ''σ");
|
||||
|
||||
assert_eq!("Σ".to_lowercase(), "σ");
|
||||
assert_eq!("'Σ".to_lowercase(), "'σ");
|
||||
assert_eq!("''Σ".to_lowercase(), "''σ");
|
||||
|
||||
assert_eq!("ΑΣΑ".to_lowercase(), "ασα");
|
||||
assert_eq!("ΑΣ'Α".to_lowercase(), "ασ'α");
|
||||
assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_uppercase() {
|
||||
assert_eq!("".to_uppercase(), "");
|
||||
assert_eq!("aéDžßfiᾀ".to_uppercase(), "AÉDŽSSFIἈΙ");
|
||||
}
|
||||
|
||||
mod pattern {
|
||||
use std::str::pattern::Pattern;
|
||||
use std::str::pattern::{Searcher, ReverseSearcher};
|
||||
|
|
|
|||
|
|
@ -58,6 +58,8 @@ fn test_to_lowercase() {
|
|||
fn lower(c: char) -> char {
|
||||
let mut it = c.to_lowercase();
|
||||
let c = it.next().unwrap();
|
||||
// As of Unicode version 7.0.0, `SpecialCasing.txt` has no lower-case mapping
|
||||
// to multiple code points.
|
||||
assert!(it.next().is_none());
|
||||
c
|
||||
}
|
||||
|
|
@ -73,29 +75,54 @@ fn test_to_lowercase() {
|
|||
assert_eq!(lower('Μ'), 'μ');
|
||||
assert_eq!(lower('Α'), 'α');
|
||||
assert_eq!(lower('Σ'), 'σ');
|
||||
assert_eq!(lower('Dž'), 'dž');
|
||||
assert_eq!(lower('fi'), 'fi');
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_uppercase() {
|
||||
fn upper(c: char) -> char {
|
||||
let mut it = c.to_uppercase();
|
||||
let c = it.next().unwrap();
|
||||
assert!(it.next().is_none());
|
||||
c
|
||||
fn upper(c: char) -> Vec<char> {
|
||||
c.to_uppercase().collect()
|
||||
}
|
||||
assert_eq!(upper('a'), 'A');
|
||||
assert_eq!(upper('ö'), 'Ö');
|
||||
assert_eq!(upper('ß'), 'ß'); // not ẞ: Latin capital letter sharp s
|
||||
assert_eq!(upper('ü'), 'Ü');
|
||||
assert_eq!(upper('💩'), '💩');
|
||||
assert_eq!(upper('a'), ['A']);
|
||||
assert_eq!(upper('ö'), ['Ö']);
|
||||
assert_eq!(upper('ß'), ['S', 'S']); // not ẞ: Latin capital letter sharp s
|
||||
assert_eq!(upper('ü'), ['Ü']);
|
||||
assert_eq!(upper('💩'), ['💩']);
|
||||
|
||||
assert_eq!(upper('σ'), 'Σ');
|
||||
assert_eq!(upper('τ'), 'Τ');
|
||||
assert_eq!(upper('ι'), 'Ι');
|
||||
assert_eq!(upper('γ'), 'Γ');
|
||||
assert_eq!(upper('μ'), 'Μ');
|
||||
assert_eq!(upper('α'), 'Α');
|
||||
assert_eq!(upper('ς'), 'Σ');
|
||||
assert_eq!(upper('σ'), ['Σ']);
|
||||
assert_eq!(upper('τ'), ['Τ']);
|
||||
assert_eq!(upper('ι'), ['Ι']);
|
||||
assert_eq!(upper('γ'), ['Γ']);
|
||||
assert_eq!(upper('μ'), ['Μ']);
|
||||
assert_eq!(upper('α'), ['Α']);
|
||||
assert_eq!(upper('ς'), ['Σ']);
|
||||
assert_eq!(upper('Dž'), ['DŽ']);
|
||||
assert_eq!(upper('fi'), ['F', 'I']);
|
||||
assert_eq!(upper('ᾀ'), ['Ἀ', 'Ι']);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_titlecase() {
|
||||
fn title(c: char) -> Vec<char> {
|
||||
c.to_titlecase().collect()
|
||||
}
|
||||
assert_eq!(title('a'), ['A']);
|
||||
assert_eq!(title('ö'), ['Ö']);
|
||||
assert_eq!(title('ß'), ['S', 's']); // not ẞ: Latin capital letter sharp s
|
||||
assert_eq!(title('ü'), ['Ü']);
|
||||
assert_eq!(title('💩'), ['💩']);
|
||||
|
||||
assert_eq!(title('σ'), ['Σ']);
|
||||
assert_eq!(title('τ'), ['Τ']);
|
||||
assert_eq!(title('ι'), ['Ι']);
|
||||
assert_eq!(title('γ'), ['Γ']);
|
||||
assert_eq!(title('μ'), ['Μ']);
|
||||
assert_eq!(title('α'), ['Α']);
|
||||
assert_eq!(title('ς'), ['Σ']);
|
||||
assert_eq!(title('DŽ'), ['Dž']);
|
||||
assert_eq!(title('fi'), ['F', 'i']);
|
||||
assert_eq!(title('ᾀ'), ['ᾈ']);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@
|
|||
|
||||
#![feature(box_patterns)]
|
||||
#![feature(box_syntax)]
|
||||
#![feature(collections)]
|
||||
#![cfg_attr(stage0, feature(collections))]
|
||||
#![feature(core)]
|
||||
#![feature(quote)]
|
||||
#![feature(rustc_diagnostic_macros)]
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@
|
|||
#![doc(primitive = "char")]
|
||||
|
||||
use core::char::CharExt as C;
|
||||
use core::option::Option::{self, Some};
|
||||
use core::option::Option::{self, Some, None};
|
||||
use core::iter::Iterator;
|
||||
use tables::{derived_property, property, general_category, conversions, charwidth};
|
||||
|
||||
|
|
@ -47,24 +47,79 @@ pub use tables::UNICODE_VERSION;
|
|||
/// the [`to_lowercase` method](../primitive.char.html#method.to_lowercase) on
|
||||
/// characters.
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub struct ToLowercase(Option<char>);
|
||||
pub struct ToLowercase(CaseMappingIter);
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl Iterator for ToLowercase {
|
||||
type Item = char;
|
||||
fn next(&mut self) -> Option<char> { self.0.take() }
|
||||
fn next(&mut self) -> Option<char> { self.0.next() }
|
||||
}
|
||||
|
||||
/// An iterator over the uppercase mapping of a given character, returned from
|
||||
/// the [`to_uppercase` method](../primitive.char.html#method.to_uppercase) on
|
||||
/// characters.
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
pub struct ToUppercase(Option<char>);
|
||||
pub struct ToUppercase(CaseMappingIter);
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
impl Iterator for ToUppercase {
|
||||
type Item = char;
|
||||
fn next(&mut self) -> Option<char> { self.0.take() }
|
||||
fn next(&mut self) -> Option<char> { self.0.next() }
|
||||
}
|
||||
|
||||
/// An iterator over the titlecase mapping of a given character, returned from
|
||||
/// the [`to_titlecase` method](../primitive.char.html#method.to_titlecase) on
|
||||
/// characters.
|
||||
#[unstable(feature = "unicode", reason = "recently added")]
|
||||
pub struct ToTitlecase(CaseMappingIter);
|
||||
|
||||
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
|
||||
impl Iterator for ToTitlecase {
|
||||
type Item = char;
|
||||
fn next(&mut self) -> Option<char> { self.0.next() }
|
||||
}
|
||||
|
||||
|
||||
enum CaseMappingIter {
|
||||
Three(char, char, char),
|
||||
Two(char, char),
|
||||
One(char),
|
||||
Zero
|
||||
}
|
||||
|
||||
impl CaseMappingIter {
|
||||
fn new(chars: [char; 3]) -> CaseMappingIter {
|
||||
if chars[2] == '\0' {
|
||||
if chars[1] == '\0' {
|
||||
CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
|
||||
} else {
|
||||
CaseMappingIter::Two(chars[0], chars[1])
|
||||
}
|
||||
} else {
|
||||
CaseMappingIter::Three(chars[0], chars[1], chars[2])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for CaseMappingIter {
|
||||
type Item = char;
|
||||
fn next(&mut self) -> Option<char> {
|
||||
match *self {
|
||||
CaseMappingIter::Three(a, b, c) => {
|
||||
*self = CaseMappingIter::Two(b, c);
|
||||
Some(a)
|
||||
}
|
||||
CaseMappingIter::Two(b, c) => {
|
||||
*self = CaseMappingIter::One(c);
|
||||
Some(b)
|
||||
}
|
||||
CaseMappingIter::One(c) => {
|
||||
*self = CaseMappingIter::Zero;
|
||||
Some(c)
|
||||
}
|
||||
CaseMappingIter::Zero => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
|
|
@ -397,27 +452,48 @@ impl char {
|
|||
|
||||
/// Converts a character to its lowercase equivalent.
|
||||
///
|
||||
/// The case-folding performed is the common or simple mapping. See
|
||||
/// `to_uppercase()` for references and more information.
|
||||
/// This performs complex unconditional mappings with no tailoring.
|
||||
/// See `to_uppercase()` for references and more information.
|
||||
///
|
||||
/// # Return value
|
||||
///
|
||||
/// Returns an iterator which yields the characters corresponding to the
|
||||
/// lowercase equivalent of the character. If no conversion is possible then
|
||||
/// the input character is returned.
|
||||
/// an iterator with just the input character is returned.
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn to_lowercase(self) -> ToLowercase {
|
||||
ToLowercase(Some(conversions::to_lower(self)))
|
||||
ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
|
||||
}
|
||||
|
||||
/// Converts a character to its titlecase equivalent.
|
||||
///
|
||||
/// This performs complex unconditional mappings with no tailoring.
|
||||
/// See `to_uppercase()` for references and more information.
|
||||
///
|
||||
/// This differs from `to_uppercase()` since Unicode contains
|
||||
/// digraphs and ligature characters.
|
||||
/// For example, U+01F3 “dz” and U+FB01 “fi”
|
||||
/// map to U+01F1 “DZ” and U+0046 U+0069 “Fi”, respectively.
|
||||
///
|
||||
/// # Return value
|
||||
///
|
||||
/// Returns an iterator which yields the characters corresponding to the
|
||||
/// lowercase equivalent of the character. If no conversion is possible then
|
||||
/// an iterator with just the input character is returned.
|
||||
#[unstable(feature = "unicode", reason = "recently added")]
|
||||
#[inline]
|
||||
pub fn to_titlecase(self) -> ToTitlecase {
|
||||
ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
|
||||
}
|
||||
|
||||
/// Converts a character to its uppercase equivalent.
|
||||
///
|
||||
/// The case-folding performed is the common or simple mapping: it maps
|
||||
/// one Unicode codepoint to its uppercase equivalent according to the
|
||||
/// Unicode database [1]. The additional [`SpecialCasing.txt`] is not yet
|
||||
/// considered here, but the iterator returned will soon support this form
|
||||
/// of case folding.
|
||||
/// This performs complex unconditional mappings with no tailoring:
|
||||
/// it maps one Unicode character to its uppercase equivalent
|
||||
/// according to the Unicode database [1]
|
||||
/// and the additional complex mappings [`SpecialCasing.txt`].
|
||||
/// Conditional mappings (based on context or language) are not considerd here.
|
||||
///
|
||||
/// A full reference can be found here [2].
|
||||
///
|
||||
|
|
@ -425,17 +501,17 @@ impl char {
|
|||
///
|
||||
/// Returns an iterator which yields the characters corresponding to the
|
||||
/// uppercase equivalent of the character. If no conversion is possible then
|
||||
/// the input character is returned.
|
||||
/// an iterator with just the input character is returned.
|
||||
///
|
||||
/// [1]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||
///
|
||||
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
|
||||
///
|
||||
/// [2]: http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf#G33992
|
||||
/// [2]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
|
||||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn to_uppercase(self) -> ToUppercase {
|
||||
ToUppercase(Some(conversions::to_upper(self)))
|
||||
ToUppercase(CaseMappingIter::new(conversions::to_upper(self)))
|
||||
}
|
||||
|
||||
/// Returns this character's displayed width in columns, or `None` if it is a
|
||||
|
|
|
|||
|
|
@ -49,3 +49,8 @@ pub mod str {
|
|||
pub use u_str::{utf8_char_width, is_utf16, Utf16Items, Utf16Item};
|
||||
pub use u_str::{utf16_items, Utf16Encoder};
|
||||
}
|
||||
|
||||
// For use in libcollections, not re-exported in libstd.
|
||||
pub mod derived_property {
|
||||
pub use tables::derived_property::{Cased, Case_Ignorable};
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue