std: unify the str -> [u8] functions as 3 methods: .as_bytes() and .as_bytes_with_null[_consume]().

The first acts on &str and is not nul-terminated, the last two act on strings
that are always null terminated (&'static str, ~str and @str).
This commit is contained in:
Huon Wilson 2013-06-11 13:10:37 +10:00
parent ba4a4778cc
commit efc71a8bdb
44 changed files with 255 additions and 218 deletions

View file

@ -1091,7 +1091,7 @@ pub fn with_bytes_reader<T>(bytes: &[u8], f: &fn(@Reader) -> T) -> T {
}
pub fn with_str_reader<T>(s: &str, f: &fn(@Reader) -> T) -> T {
str::byte_slice(s, |bytes| with_bytes_reader(bytes, f))
with_bytes_reader(s.as_bytes(), f)
}
// Writing
@ -1462,7 +1462,7 @@ impl<T:Writer> WriterUtil for T {
self.write_str(str::from_char(ch));
}
}
fn write_str(&self, s: &str) { str::byte_slice(s, |v| self.write(v)) }
fn write_str(&self, s: &str) { self.write(s.as_bytes()) }
fn write_line(&self, s: &str) {
self.write_str(s);
self.write_str(&"\n");

View file

@ -793,27 +793,27 @@ mod tests {
#[test]
fn test_parse_bytes() {
use str::to_bytes;
assert_eq!(parse_bytes(to_bytes("123"), 10u), Some(123 as $T));
assert_eq!(parse_bytes(to_bytes("1001"), 2u), Some(9 as $T));
assert_eq!(parse_bytes(to_bytes("123"), 8u), Some(83 as $T));
assert_eq!(i32::parse_bytes(to_bytes("123"), 16u), Some(291 as i32));
assert_eq!(i32::parse_bytes(to_bytes("ffff"), 16u), Some(65535 as i32));
assert_eq!(i32::parse_bytes(to_bytes("FFFF"), 16u), Some(65535 as i32));
assert_eq!(parse_bytes(to_bytes("z"), 36u), Some(35 as $T));
assert_eq!(parse_bytes(to_bytes("Z"), 36u), Some(35 as $T));
use str::StrSlice;
assert_eq!(parse_bytes("123".as_bytes(), 10u), Some(123 as $T));
assert_eq!(parse_bytes("1001".as_bytes(), 2u), Some(9 as $T));
assert_eq!(parse_bytes("123".as_bytes(), 8u), Some(83 as $T));
assert_eq!(i32::parse_bytes("123".as_bytes(), 16u), Some(291 as i32));
assert_eq!(i32::parse_bytes("ffff".as_bytes(), 16u), Some(65535 as i32));
assert_eq!(i32::parse_bytes("FFFF".as_bytes(), 16u), Some(65535 as i32));
assert_eq!(parse_bytes("z".as_bytes(), 36u), Some(35 as $T));
assert_eq!(parse_bytes("Z".as_bytes(), 36u), Some(35 as $T));
assert_eq!(parse_bytes(to_bytes("-123"), 10u), Some(-123 as $T));
assert_eq!(parse_bytes(to_bytes("-1001"), 2u), Some(-9 as $T));
assert_eq!(parse_bytes(to_bytes("-123"), 8u), Some(-83 as $T));
assert_eq!(i32::parse_bytes(to_bytes("-123"), 16u), Some(-291 as i32));
assert_eq!(i32::parse_bytes(to_bytes("-ffff"), 16u), Some(-65535 as i32));
assert_eq!(i32::parse_bytes(to_bytes("-FFFF"), 16u), Some(-65535 as i32));
assert_eq!(parse_bytes(to_bytes("-z"), 36u), Some(-35 as $T));
assert_eq!(parse_bytes(to_bytes("-Z"), 36u), Some(-35 as $T));
assert_eq!(parse_bytes("-123".as_bytes(), 10u), Some(-123 as $T));
assert_eq!(parse_bytes("-1001".as_bytes(), 2u), Some(-9 as $T));
assert_eq!(parse_bytes("-123".as_bytes(), 8u), Some(-83 as $T));
assert_eq!(i32::parse_bytes("-123".as_bytes(), 16u), Some(-291 as i32));
assert_eq!(i32::parse_bytes("-ffff".as_bytes(), 16u), Some(-65535 as i32));
assert_eq!(i32::parse_bytes("-FFFF".as_bytes(), 16u), Some(-65535 as i32));
assert_eq!(parse_bytes("-z".as_bytes(), 36u), Some(-35 as $T));
assert_eq!(parse_bytes("-Z".as_bytes(), 36u), Some(-35 as $T));
assert!(parse_bytes(to_bytes("Z"), 35u).is_none());
assert!(parse_bytes(to_bytes("-9"), 2u).is_none());
assert!(parse_bytes("Z".as_bytes(), 35u).is_none());
assert!(parse_bytes("-9".as_bytes(), 2u).is_none());
}
#[test]

View file

@ -16,6 +16,7 @@ use ops::{Add, Sub, Mul, Div, Rem, Neg};
use option::{None, Option, Some};
use char;
use str;
use str::{StrSlice};
use kinds::Copy;
use vec;
use vec::{CopyableVector, ImmutableVector};
@ -189,18 +190,18 @@ pub fn to_str_bytes_common<T:NumCast+Zero+One+Eq+Ord+NumStrConv+Copy+
let _1: T = One::one();
if is_NaN(num) {
return (str::to_bytes("NaN"), true);
return ("NaN".as_bytes().to_owned(), true);
}
else if is_inf(num){
return match sign {
SignAll => (str::to_bytes("+inf"), true),
_ => (str::to_bytes("inf"), true)
SignAll => ("+inf".as_bytes().to_owned(), true),
_ => ("inf".as_bytes().to_owned(), true)
}
}
else if is_neg_inf(num) {
return match sign {
SignNone => (str::to_bytes("inf"), true),
_ => (str::to_bytes("-inf"), true),
SignNone => ("inf".as_bytes().to_owned(), true),
_ => ("-inf".as_bytes().to_owned(), true),
}
}
@ -638,7 +639,7 @@ pub fn from_str_common<T:NumCast+Zero+One+Eq+Ord+Copy+Div<T,T>+Mul<T,T>+
special: bool, exponent: ExponentFormat, empty_zero: bool,
ignore_underscores: bool
) -> Option<T> {
from_str_bytes_common(str::to_bytes(buf), radix, negative,
from_str_bytes_common(buf.as_bytes(), radix, negative,
fractional, special, exponent, empty_zero,
ignore_underscores)
}

View file

@ -538,16 +538,16 @@ mod tests {
#[test]
pub fn test_parse_bytes() {
use str::to_bytes;
assert_eq!(parse_bytes(to_bytes("123"), 10u), Some(123u as $T));
assert_eq!(parse_bytes(to_bytes("1001"), 2u), Some(9u as $T));
assert_eq!(parse_bytes(to_bytes("123"), 8u), Some(83u as $T));
assert_eq!(u16::parse_bytes(to_bytes("123"), 16u), Some(291u as u16));
assert_eq!(u16::parse_bytes(to_bytes("ffff"), 16u), Some(65535u as u16));
assert_eq!(parse_bytes(to_bytes("z"), 36u), Some(35u as $T));
use str::StrSlice;
assert_eq!(parse_bytes("123".as_bytes(), 10u), Some(123u as $T));
assert_eq!(parse_bytes("1001".as_bytes(), 2u), Some(9u as $T));
assert_eq!(parse_bytes("123".as_bytes(), 8u), Some(83u as $T));
assert_eq!(u16::parse_bytes("123".as_bytes(), 16u), Some(291u as u16));
assert_eq!(u16::parse_bytes("ffff".as_bytes(), 16u), Some(65535u as u16));
assert_eq!(parse_bytes("z".as_bytes(), 36u), Some(35u as $T));
assert!(parse_bytes(to_bytes("Z"), 10u).is_none());
assert!(parse_bytes(to_bytes("_"), 2u).is_none());
assert!(parse_bytes("Z".as_bytes(), 10u).is_none());
assert!(parse_bytes("_".as_bytes(), 2u).is_none());
}
#[test]

View file

@ -1448,9 +1448,9 @@ mod tests {
use rand::RngUtil;
use rand;
use run;
use str;
use str::StrSlice;
use vec;
use vec::CopyableVector;
use libc::consts::os::posix88::{S_IRUSR, S_IWUSR, S_IXUSR};
@ -1684,7 +1684,7 @@ mod tests {
};
assert!((ostream as uint != 0u));
let s = ~"hello";
let mut buf = str::to_bytes(s) + [0 as u8];
let mut buf = s.as_bytes_with_null().to_owned();
do vec::as_mut_buf(buf) |b, _len| {
assert!((libc::fwrite(b as *c_void, 1u as size_t,
(s.len() + 1u) as size_t, ostream)

View file

@ -947,7 +947,6 @@ pub mod windows {
mod tests {
use option::{None, Some};
use path::{PosixPath, WindowsPath, windows};
use str;
#[test]
fn test_double_slash_collapsing() {

View file

@ -64,7 +64,7 @@ pub use path::PosixPath;
pub use path::WindowsPath;
pub use ptr::RawPtr;
pub use ascii::{Ascii, AsciiCast, OwnedAsciiCast, AsciiStr};
pub use str::{StrVector, StrSlice, OwnedStr, StrUtil};
pub use str::{StrVector, StrSlice, OwnedStr, StrUtil, NullTerminatedStr};
pub use from_str::{FromStr};
pub use to_bytes::IterBytes;
pub use to_str::{ToStr, ToStrConsume};

View file

@ -75,5 +75,5 @@ fn super_simple_smoke_test_lets_go_read_some_files_and_have_a_good_time() {
let message = "it's alright. have a good time";
let filename = &Path("test.txt");
let mut outstream = FileStream::open(filename, Create, Read).unwrap();
outstream.write(message.to_bytes());
outstream.write(message.as_bytes());
}

View file

@ -108,7 +108,7 @@ mod test {
let mem_writer = MemWriter::new();
let mut deflate_writer = DeflateWriter::new(mem_writer);
let in_msg = "test";
let in_bytes = in_msg.to_bytes();
let in_bytes = in_msg.as_bytes();
deflate_writer.write(in_bytes);
deflate_writer.flush();
let buf = deflate_writer.inner().inner();

View file

@ -741,8 +741,7 @@ fn with_envp<T>(env: Option<&[(~str, ~str)]>, cb: &fn(*mut c_void) -> T) -> T {
let mut blk = ~[];
for es.each |&(k, v)| {
let kv = fmt!("%s=%s", k, v);
blk.push_all(str::as_bytes_slice(kv));
blk.push(0);
blk.push_all(kv.as_bytes_consume_with_nul());
}
blk.push(0);
vec::as_imm_buf(blk, |p, _len|

View file

@ -304,40 +304,6 @@ impl<'self> StrVector for &'self [&'self str] {
}
}
/*
Section: Transforming strings
*/
/**
* Converts a string to a unique vector of bytes
*
* The result vector is not null-terminated.
*/
pub fn to_bytes(s: &str) -> ~[u8] {
unsafe {
let mut v: ~[u8] = ::cast::transmute(s.to_owned());
vec::raw::set_len(&mut v, s.len());
v
}
}
/// Work with the string as a byte slice, not including trailing null.
#[inline(always)]
pub fn byte_slice<T>(s: &str, f: &fn(v: &[u8]) -> T) -> T {
do as_buf(s) |p,n| {
unsafe { vec::raw::buf_as_slice(p, n-1u, f) }
}
}
/// Work with the string as a byte slice, not including trailing null, without
/// a callback.
#[inline(always)]
pub fn byte_slice_no_callback<'a>(s: &'a str) -> &'a [u8] {
unsafe {
cast::transmute(s)
}
}
/// Something that can be used to compare against a character
pub trait CharEq {
/// Determine if the splitter should split at the given character
@ -1081,39 +1047,6 @@ static tag_five_b: uint = 248u;
static max_five_b: uint = 67108864u;
static tag_six_b: uint = 252u;
/**
* Work with the byte buffer of a string.
*
* Allows for unsafe manipulation of strings, which is useful for foreign
* interop.
*
* # Example
*
* ~~~ {.rust}
* let i = str::as_bytes("Hello World") { |bytes| bytes.len() };
* ~~~
*/
#[inline]
pub fn as_bytes<T>(s: &const ~str, f: &fn(&~[u8]) -> T) -> T {
unsafe {
let v: *~[u8] = cast::transmute(copy s);
f(&*v)
}
}
/**
* Work with the byte buffer of a string as a byte slice.
*
* The byte slice does not include the null terminator.
*/
pub fn as_bytes_slice<'a>(s: &'a str) -> &'a [u8] {
unsafe {
let (ptr, len): (*u8, uint) = ::cast::transmute(s);
let outgoing_tuple: (*u8, uint) = (ptr, len - 1);
return ::cast::transmute(outgoing_tuple);
}
}
/**
* A dummy trait to hold all the utility methods that we implement on strings.
*/
@ -1216,11 +1149,10 @@ pub fn subslice_offset(outer: &str, inner: &str) -> uint {
* reallocating
*/
pub fn capacity(s: &const ~str) -> uint {
do as_bytes(s) |buf| {
let vcap = vec::capacity(buf);
assert!(vcap > 0u);
vcap - 1u
}
let buf: &const ~[u8] = unsafe { cast::transmute(s) };
let vcap = vec::capacity(buf);
assert!(vcap > 0u);
vcap - 1u
}
/// Escape each char in `s` with char::escape_default.
@ -1482,7 +1414,7 @@ pub trait StrSlice<'self> {
fn char_at(&self, i: uint) -> char;
fn char_range_at_reverse(&self, start: uint) -> CharRange;
fn char_at_reverse(&self, i: uint) -> char;
fn to_bytes(&self) -> ~[u8];
fn as_bytes(&self) -> &'self [u8];
fn find<C: CharEq>(&self, search: C) -> Option<uint>;
fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
@ -1545,12 +1477,12 @@ impl<'self> StrSlice<'self> for &'self str {
/// An iterator over the bytes of `self`
#[inline]
fn bytes_iter(&self) -> StrBytesIterator<'self> {
StrBytesIterator { it: as_bytes_slice(*self).iter() }
StrBytesIterator { it: self.as_bytes().iter() }
}
/// An iterator over the bytes of `self`, in reverse order
#[inline]
fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self> {
StrBytesRevIterator { it: as_bytes_slice(*self).rev_iter() }
StrBytesRevIterator { it: self.as_bytes().rev_iter() }
}
/// An iterator over substrings of `self`, separated by characters
@ -1936,7 +1868,18 @@ impl<'self> StrSlice<'self> for &'self str {
self.char_range_at_reverse(i).ch
}
fn to_bytes(&self) -> ~[u8] { to_bytes(*self) }
/**
* Work with the byte buffer of a string as a byte slice.
*
* The byte slice does not include the null terminator.
*/
fn as_bytes(&self) -> &'self [u8] {
unsafe {
let (ptr, len): (*u8, uint) = ::cast::transmute(*self);
let outgoing_tuple: (*u8, uint) = (ptr, len - 1);
::cast::transmute(outgoing_tuple)
}
}
/**
* Returns the byte index of the first character of `self` that matches `search`
@ -2051,6 +1994,50 @@ impl<'self> StrSlice<'self> for &'self str {
}
#[allow(missing_doc)]
pub trait NullTerminatedStr {
fn as_bytes_with_null<'a>(&'a self) -> &'a [u8];
}
impl NullTerminatedStr for ~str {
/**
* Work with the byte buffer of a string as a byte slice.
*
* The byte slice does include the null terminator.
*/
#[inline]
fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
let ptr: &'a ~[u8] = unsafe { ::cast::transmute(self) };
let slice: &'a [u8] = *ptr;
slice
}
}
impl NullTerminatedStr for @str {
/**
* Work with the byte buffer of a string as a byte slice.
*
* The byte slice does include the null terminator.
*/
#[inline]
fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
let ptr: &'a ~[u8] = unsafe { ::cast::transmute(self) };
let slice: &'a [u8] = *ptr;
slice
}
}
// static strings are the only slices guaranteed to a nul-terminator
impl NullTerminatedStr for &'static str {
/**
* Work with the byte buffer of a string as a byte slice.
*
* The byte slice does include the null terminator.
*/
#[inline]
fn as_bytes_with_null(&self) -> &'static [u8] {
unsafe { ::cast::transmute(*self) }
}
}
#[allow(missing_doc)]
pub trait OwnedStr {
fn push_str_no_overallocate(&mut self, rhs: &str);
@ -2062,6 +2049,8 @@ pub trait OwnedStr {
fn append(&self, rhs: &str) -> ~str; // FIXME #4850: this should consume self.
fn reserve(&mut self, n: uint);
fn reserve_at_least(&mut self, n: uint);
fn as_bytes_with_null_consume(self) -> ~[u8];
}
impl OwnedStr for ~str {
@ -2251,6 +2240,13 @@ impl OwnedStr for ~str {
fn reserve_at_least(&mut self, n: uint) {
self.reserve(uint::next_power_of_two(n + 1u) - 1u)
}
/// Convert to a vector of bytes. This does not allocate a new
/// string, and includes the null terminator.
#[inline]
fn as_bytes_with_null_consume(self) -> ~[u8] {
unsafe { ::cast::transmute(self) }
}
}
impl Clone for ~str {
@ -2336,7 +2332,7 @@ mod tests {
use ptr;
use str::*;
use vec;
use vec::ImmutableVector;
use vec::{ImmutableVector, CopyableVector};
use cmp::{TotalOrd, Less, Equal, Greater};
#[test]
@ -2952,12 +2948,70 @@ mod tests {
}
}
#[test]
fn test_as_bytes() {
// no null
let v = [
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
109
];
assert_eq!("".as_bytes(), &[]);
assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
}
#[test]
fn test_as_bytes_with_null() {
// has null
let v = [
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
109, 0
];
assert_eq!("".as_bytes_with_null(), &[0]);
assert_eq!("abc".as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
assert_eq!("ศไทย中华Việt Nam".as_bytes_with_null(), v);
let s1 = @"";
let s2 = @"abc";
let s3 = @"ศไทย中华Việt Nam";
assert_eq!(s1.as_bytes_with_null(), &[0]);
assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
assert_eq!(s3.as_bytes_with_null(), v);
let s1 = ~"";
let s2 = ~"abc";
let s3 = ~"ศไทย中华Việt Nam";
assert_eq!(s1.as_bytes_with_null(), &[0]);
assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
assert_eq!(s3.as_bytes_with_null(), v);
}
#[test]
fn test_as_bytes_with_null_consume() {
let s = ~"ศไทย中华Việt Nam";
let v = ~[
224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
109, 0
];
assert_eq!((~"").as_bytes_with_null_consume(), ~[0]);
assert_eq!((~"abc").as_bytes_with_null_consume(),
~['a' as u8, 'b' as u8, 'c' as u8, 0]);
assert_eq!(s.as_bytes_with_null_consume(), v);
}
#[test]
#[ignore(cfg(windows))]
#[should_fail]
fn test_as_bytes_fail() {
// Don't double free
as_bytes::<()>(&~"", |_bytes| fail!() );
// Don't double free. (I'm not sure if this exercises the
// original problem code path anymore.)
let s = ~"";
let _bytes = s.as_bytes_with_null();
fail!();
}
#[test]
@ -3032,7 +3086,7 @@ mod tests {
fn vec_str_conversions() {
let s1: ~str = ~"All mimsy were the borogoves";
let v: ~[u8] = to_bytes(s1);
let v: ~[u8] = s1.as_bytes().to_owned();
let s2: ~str = from_bytes(v);
let mut i: uint = 0u;
let n1: uint = s1.len();

View file

@ -18,7 +18,7 @@ use io;
use io::Writer;
use option::{None, Option, Some};
use old_iter::BaseIter;
use str;
use str::StrSlice;
pub type Cb<'self> = &'self fn(buf: &[u8]) -> bool;
@ -239,27 +239,25 @@ impl<A:IterBytes> IterBytes for @[A] {
impl<'self> IterBytes for &'self str {
#[inline(always)]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
do str::byte_slice(*self) |bytes| {
f(bytes)
}
f(self.as_bytes())
}
}
impl IterBytes for ~str {
#[inline(always)]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
do str::byte_slice(*self) |bytes| {
f(bytes)
}
// this should possibly include the null terminator, but that
// breaks .find_equiv on hashmaps.
f(self.as_bytes())
}
}
impl IterBytes for @str {
#[inline(always)]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
do str::byte_slice(*self) |bytes| {
f(bytes)
}
// this should possibly include the null terminator, but that
// breaks .find_equiv on hashmaps.
f(self.as_bytes())
}
}