From 276293af7c0fb901c8344e88562ce635f26f47a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marvin=20L=C3=B6bel?= Date: Thu, 18 Apr 2013 10:01:23 +0200 Subject: [PATCH] Cleaned up case related functions a bit --- src/libcore/char.rs | 18 ++++++++++++++++-- src/libcore/str.rs | 41 +++++++++++++++++++---------------------- 2 files changed, 35 insertions(+), 24 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 6ca33540ceef..c07a31490c34 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -1,4 +1,4 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -234,6 +234,21 @@ pub fn escape_default(c: char) -> ~str { } } +/// Returns the amount of bytes this character would need if encoded in utf8 +pub fn len_utf8_bytes(c: char) -> uint { + static max_one_b: uint = 128u; + static max_two_b: uint = 2048u; + static max_three_b: uint = 65536u; + static max_four_b: uint = 2097152u; + + let code = c as uint; + if code < max_one_b { 1u } + else if code < max_two_b { 2u } + else if code < max_three_b { 3u } + else if code < max_four_b { 4u } + else { fail!(~"invalid character!") } +} + /** * Compare two chars * @@ -334,7 +349,6 @@ fn test_escape_default() { assert_eq!(escape_default('\U0001d4b6'), ~"\\U0001d4b6"); } - #[test] fn test_escape_unicode() { assert_eq!(escape_unicode('\x00'), ~"\\x00"); diff --git a/src/libcore/str.rs b/src/libcore/str.rs index d72b4a71e2a6..cbdd14519530 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -1,4 +1,4 @@ -// Copyright 2012 The Rust Project Developers. See the COPYRIGHT +// Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT // file at the top-level directory of this distribution and at // http://rust-lang.org/COPYRIGHT. // @@ -789,16 +789,18 @@ pub fn each_split_within<'a>(ss: &'a str, /// Convert a string to lowercase. ASCII only pub fn to_lower(s: &str) -> ~str { - map(s, - |c| unsafe{(libc::tolower(c as libc::c_char)) as char} - ) + do map(s) |c| { + assert!(char::is_ascii(c)); + (unsafe{libc::tolower(c as libc::c_char)}) as char + } } /// Convert a string to uppercase. ASCII only pub fn to_upper(s: &str) -> ~str { - map(s, - |c| unsafe{(libc::toupper(c as libc::c_char)) as char} - ) + do map(s) |c| { + assert!(char::is_ascii(c)); + (unsafe{libc::toupper(c as libc::c_char)}) as char + } } /** @@ -3096,12 +3098,11 @@ mod tests { #[test] fn test_to_lower() { - unsafe { - assert!(~"" == map(~"", - |c| libc::tolower(c as c_char) as char)); - assert!(~"ymca" == map(~"YMCA", - |c| libc::tolower(c as c_char) as char)); - } + // libc::tolower, and hence str::to_lower + // are culturally insensitive: they only work for ASCII + // (see Issue #1347) + assert!(~"" == to_lower("")); + assert!(~"ymca" == to_lower("YMCA")); } #[test] @@ -3666,12 +3667,8 @@ mod tests { #[test] fn test_map() { - unsafe { - assert!(~"" == map(~"", |c| - libc::toupper(c as c_char) as char)); - assert!(~"YMCA" == map(~"ymca", - |c| libc::toupper(c as c_char) as char)); - } + assert!(~"" == map(~"", |c| unsafe {libc::toupper(c as c_char)} as char)); + assert!(~"YMCA" == map(~"ymca", |c| unsafe {libc::toupper(c as c_char)} as char)); } #[test] @@ -3685,11 +3682,11 @@ mod tests { #[test] fn test_any() { - assert!(false == any(~"", char::is_uppercase)); + assert!(false == any(~"", char::is_uppercase)); assert!(false == any(~"ymca", char::is_uppercase)); assert!(true == any(~"YMCA", char::is_uppercase)); - assert!(true == any(~"yMCA", char::is_uppercase)); - assert!(true == any(~"Ymcy", char::is_uppercase)); + assert!(true == any(~"yMCA", char::is_uppercase)); + assert!(true == any(~"Ymcy", char::is_uppercase)); } #[test]