diff --git a/src/libnative/io/file.rs b/src/libnative/io/file.rs index e9c9f51966c6..80f5f74c53a5 100644 --- a/src/libnative/io/file.rs +++ b/src/libnative/io/file.rs @@ -571,7 +571,8 @@ pub fn readdir(p: &CString) -> IoResult<~[Path]> { else { let fp_vec = vec::from_buf( fp_buf, wcslen(fp_buf) as uint); - let fp_str = str::from_utf16(fp_vec); + let fp_str = str::from_utf16(fp_vec) + .expect("rust_list_dir_wfd_fp_buf returned invalid UTF-16"); paths.push(Path::new(fp_str)); } more_files = FindNextFileW(find_handle, wfd_ptr as HANDLE); diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 719ed62d03d0..31e88905b30f 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -88,7 +88,7 @@ pub fn getcwd() -> Path { fail!(); } } - Path::new(str::from_utf16(buf)) + Path::new(str::from_utf16(buf).expect("GetCurrentDirectoryW returned invalid UTF-16")) } #[cfg(windows)] @@ -124,7 +124,12 @@ pub mod win32 { } if k != 0 && done { let sub = buf.slice(0, k as uint); - res = option::Some(str::from_utf16(sub)); + // We want to explicitly catch the case when the + // closure returned invalid UTF-16, rather than + // set `res` to None and continue. + let s = str::from_utf16(sub) + .expect("fill_utf16_buf_and_decode: closure created invalid UTF-16"); + res = option::Some(s) } } return res; @@ -739,7 +744,7 @@ pub fn last_os_error() -> ~str { fail!("[{}] FormatMessage failure", errno()); } - str::from_utf16(buf) + str::from_utf16(buf).expect("FormatMessageW returned invalid UTF-16") } } @@ -828,8 +833,8 @@ fn real_args() -> ~[~str] { while *ptr.offset(len as int) != 0 { len += 1; } // Push it onto the list. - args.push(vec::raw::buf_as_slice(ptr, len, - str::from_utf16)); + let opt_s = vec::raw::buf_as_slice(ptr, len, str::from_utf16); + args.push(opt_s.expect("CommandLineToArgvW returned invalid UTF-16")); } } diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 34bcb083134c..20321dad6007 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -920,11 +920,8 @@ pub fn utf16_items<'a>(v: &'a [u16]) -> UTF16Items<'a> { UTF16Items { iter : v.iter() } } -/// Decode a UTF-16 encoded vector `v` into a string. -/// -/// # Failure -/// -/// Fails on invalid UTF-16 data. +/// Decode a UTF-16 encoded vector `v` into a string, returning `None` +/// if `v` contains any invalid data. /// /// # Example /// @@ -932,17 +929,23 @@ pub fn utf16_items<'a>(v: &'a [u16]) -> UTF16Items<'a> { /// use std::str; /// /// // 𝄞music -/// let v = [0xD834, 0xDD1E, 0x006d, 0x0075, -/// 0x0073, 0x0069, 0x0063]; -/// assert_eq!(str::from_utf16(v), ~"𝄞music"); +/// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075, +/// 0x0073, 0x0069, 0x0063]; +/// assert_eq!(str::from_utf16(v), Some(~"𝄞music")); +/// +/// // 𝄞muic +/// v[4] = 0xD800; +/// assert_eq!(str::from_utf16(v), None); /// ``` -pub fn from_utf16(v: &[u16]) -> ~str { - utf16_items(v).map(|c| { - match c { - ScalarValue(c) => c, - LoneSurrogate(u) => fail!("from_utf16: found lone surrogate {}", u) - } - }).collect() +pub fn from_utf16(v: &[u16]) -> Option<~str> { + let mut s = with_capacity(v.len() / 2); + for c in utf16_items(v) { + match c { + ScalarValue(c) => s.push_char(c), + LoneSurrogate(_) => return None + } + } + Some(s) } /// Decode a UTF-16 encoded vector `v` into a string, replacing @@ -3834,14 +3837,29 @@ mod tests { assert!(is_utf16(u)); assert_eq!(s.to_utf16(), u); - assert_eq!(from_utf16(u), s); + assert_eq!(from_utf16(u).unwrap(), s); assert_eq!(from_utf16_lossy(u), s); - assert_eq!(from_utf16(s.to_utf16()), s); - assert_eq!(from_utf16(u).to_utf16(), u); + assert_eq!(from_utf16(s.to_utf16()).unwrap(), s); + assert_eq!(from_utf16(u).unwrap().to_utf16(), u); } } + #[test] + fn test_utf16_invalid() { + // completely positive cases tested above. + // lead + eof + assert_eq!(from_utf16([0xD800]), None); + // lead + lead + assert_eq!(from_utf16([0xD800, 0xD800]), None); + + // isolated trail + assert_eq!(from_utf16([0x0061, 0xDC00]), None); + + // general + assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None); + } + #[test] fn test_utf16_lossy() { // completely positive cases tested above.