From f1886680e00850843e2524fba609ddba6a13180b Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Fri, 14 Jun 2013 01:44:15 +1000 Subject: [PATCH] std: convert str::to_utf16 to a method. --- src/libstd/os.rs | 2 +- src/libstd/str.rs | 55 ++++++++++++++++++++++++----------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 044b305a0dd9..115729571986 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -144,7 +144,7 @@ pub mod win32 { } pub fn as_utf16_p(s: &str, f: &fn(*u16) -> T) -> T { - let mut t = str::to_utf16(s); + let mut t = s.to_utf16(); // Null terminate before passing on. t += [0u16]; vec::as_imm_buf(t, |buf, _len| f(buf)) diff --git a/src/libstd/str.rs b/src/libstd/str.rs index fbdbb1b3f74b..d5beb755a27d 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -759,30 +759,6 @@ pub fn is_utf16(v: &[u16]) -> bool { return true; } -/// Converts to a vector of `u16` encoded as UTF-16 -pub fn to_utf16(s: &str) -> ~[u16] { - let mut u = ~[]; - for s.iter().advance |ch| { - // Arithmetic with u32 literals is easier on the eyes than chars. - let mut ch = ch as u32; - - if (ch & 0xFFFF_u32) == ch { - // The BMP falls through (assuming non-surrogate, as it - // should) - assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32); - u.push(ch as u16) - } else { - // Supplementary planes break into surrogates. - assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32); - ch -= 0x1_0000_u32; - let w1 = 0xD800_u16 | ((ch >> 10) as u16); - let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); - u.push_all([w1, w2]) - } - } - u -} - /// Iterates over the utf-16 characters in the specified slice, yielding each /// decoded unicode character to the function provided. /// @@ -1188,6 +1164,7 @@ pub trait StrSlice<'self> { fn replace(&self, from: &str, to: &str) -> ~str; fn to_owned(&self) -> ~str; fn to_managed(&self) -> @str; + fn to_utf16(&self) -> ~[u16]; fn is_char_boundary(&self, index: uint) -> bool; fn char_range_at(&self, start: uint) -> CharRange; fn char_at(&self, i: uint) -> char; @@ -1602,6 +1579,30 @@ impl<'self> StrSlice<'self> for &'self str { unsafe { ::cast::transmute(v) } } + /// Converts to a vector of `u16` encoded as UTF-16. + fn to_utf16(&self) -> ~[u16] { + let mut u = ~[]; + for self.iter().advance |ch| { + // Arithmetic with u32 literals is easier on the eyes than chars. + let mut ch = ch as u32; + + if (ch & 0xFFFF_u32) == ch { + // The BMP falls through (assuming non-surrogate, as it + // should) + assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32); + u.push(ch as u16) + } else { + // Supplementary planes break into surrogates. + assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32); + ch -= 0x1_0000_u32; + let w1 = 0xD800_u16 | ((ch >> 10) as u16); + let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16); + u.push_all([w1, w2]) + } + } + u + } + /** * Returns false if the index points into the middle of a multi-byte * character sequence. @@ -3116,10 +3117,10 @@ mod tests { for pairs.each |p| { let (s, u) = copy *p; - assert!(to_utf16(s) == u); + assert!(s.to_utf16() == u); assert!(from_utf16(u) == s); - assert!(from_utf16(to_utf16(s)) == s); - assert!(to_utf16(from_utf16(u)) == u); + assert!(from_utf16(s.to_utf16()) == s); + assert!(from_utf16(u).to_utf16() == u); } }