From 372673b58c09355c6ca963a05e2cbc142b832eac Mon Sep 17 00:00:00 2001 From: Tim Chevalier Date: Fri, 23 Mar 2012 14:41:02 -0700 Subject: [PATCH] Remove char::to_lower, char::to_upper, use libc versions instead As per Graydon's comments on #1985: remove char::to_lower and char::to_upper. The str versions of these functions now call libc::tolower and libc::toupper (using wrappers that cast between char and c_char). Not sure how much better that is, but it at least makes it clearer that these functions are Unicode-unsafe. --- src/libcore/char.rs | 44 +------------------------------------------- src/libcore/libc.rs | 4 ++-- src/libcore/str.rs | 25 ++++++++++++++----------- 3 files changed, 17 insertions(+), 56 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index ad9b7388d6ec..910c28409a1e 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -38,7 +38,7 @@ export is_alphabetic, is_lowercase, is_uppercase, is_whitespace, is_alphanumeric, is_ascii, is_digit, - to_digit, to_lower, to_upper, cmp; + to_digit, cmp; import is_alphabetic = unicode::derived_property::Alphabetic; import is_XID_start = unicode::derived_property::XID_Start; @@ -122,30 +122,6 @@ pure fn to_digit(c: char, radix: uint) -> option { else { none } } -/* - FIXME: works only on ASCII (Issue #1985) -*/ -#[doc = "Convert a char to the corresponding lower case."] -pure fn to_lower(c: char) -> char { - assert is_ascii(c); - alt c { - 'A' to 'Z' { ((c as u8) + 32u8) as char } - _ { c } - } -} - -/* - FIXME: works only on ASCII (Issue 1985) -*/ -#[doc = "Convert a char to the corresponding upper case."] -pure fn to_upper(c: char) -> char { - assert is_ascii(c); - alt c { - 'a' to 'z' { ((c as u8) - 32u8) as char } - _ { c } - } -} - #[doc = " Compare two chars @@ -206,24 +182,6 @@ fn test_to_digit() { assert to_digit('$', 36u) == none; } -#[test] -fn test_to_lower() { - assert (to_lower('H') == 'h'); - assert (to_lower('e') == 'e'); - // non-ASCII, shouldn't work (see earlier FIXME) - //assert (to_lower('Ö') == 'ö'); - //assert (to_lower('ß') == 'ß'); -} - -#[test] -fn test_to_upper() { - assert (to_upper('l') == 'L'); - assert (to_upper('Q') == 'Q'); - // non-ASCII, shouldn't work (see earlier FIXME) - //assert (to_upper('ü') == 'Ü'); - //assert (to_upper('ß') == 'ß'); -} - #[test] fn test_is_ascii() unsafe { assert str::all("banana", char::is_ascii); diff --git a/src/libcore/libc.rs b/src/libcore/libc.rs index b46b9cb7d5d0..912ab3a7429d 100644 --- a/src/libcore/libc.rs +++ b/src/libcore/libc.rs @@ -736,8 +736,8 @@ mod funcs { fn isspace(c: c_int) -> c_int; fn isupper(c: c_int) -> c_int; fn isxdigit(c: c_int) -> c_int; - fn tolower(c: c_int) -> c_int; - fn toupper(c: c_int) -> c_int; + fn tolower(c: c_char) -> c_char; + fn toupper(c: c_char) -> c_char; } #[nolink] diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 95dc2e130339..057c0b0d187f 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -95,7 +95,6 @@ export unsafe; - #[abi = "cdecl"] native mod rustrt { fn rust_str_push(&s: str, ch: u8); @@ -492,14 +491,14 @@ fn words(s: str) -> [str] { split_nonempty(s, {|c| char::is_whitespace(c)}) } -#[doc = "Convert a string to lowercase"] +#[doc = "Convert a string to lowercase. ASCII only"] fn to_lower(s: str) -> str { - map(s, char::to_lower) + map(s, {|c| (libc::tolower(c as libc::c_char)) as char}) } -#[doc = "Convert a string to uppercase"] +#[doc = "Convert a string to uppercase. ASCII only"] fn to_upper(s: str) -> str { - map(s, char::to_upper) + map(s, {|c| (libc::toupper(c as libc::c_char)) as char}) } #[doc = " @@ -1629,6 +1628,8 @@ mod unsafe { #[cfg(test)] mod tests { + import libc::c_char; + #[test] fn test_eq() { assert (eq("", "")); @@ -1937,9 +1938,9 @@ mod tests { #[test] fn test_to_upper() { - // char::to_upper, and hence str::to_upper + // libc::toupper, and hence str::to_upper // are culturally insensitive: they only work for ASCII - // (see Issue #1985) + // (see Issue #1347) let unicode = ""; //"\u65e5\u672c"; // uncomment once non-ASCII works let input = "abcDEF" + unicode + "xyz:.;"; let expected = "ABCDEF" + unicode + "XYZ:.;"; @@ -1949,8 +1950,9 @@ mod tests { #[test] fn test_to_lower() { - assert "" == map("", char::to_lower); - assert "ymca" == map("YMCA", char::to_lower); + assert "" == map("", {|c| libc::tolower(c as c_char) as char}); + assert "ymca" == map("YMCA", + {|c| libc::tolower(c as c_char) as char}); } #[test] @@ -2401,8 +2403,9 @@ mod tests { #[test] fn test_map() { - assert "" == map("", char::to_upper); - assert "YMCA" == map("ymca", char::to_upper); + assert "" == map("", {|c| libc::toupper(c as c_char) as char}); + assert "YMCA" == map("ymca", {|c| libc::toupper(c as c_char) + as char}); } #[test]