std: integrating erickt's url encoding/decoding from github.com/erickt/rust-uri into std::net::url

This commit is contained in:
Daniel Patterson 2012-07-24 23:21:32 -04:00 committed by Brian Anderson
parent dfe1f6260e
commit ef46314d1e

View file

@ -1,6 +1,17 @@
//! Types/fns concerning URLs (see RFC 3986)
export url, userinfo, query, from_str, to_str, get_scheme;
import map;
import map::{hashmap, str_hash};
import io::{reader, reader_util};
import dvec::{dvec, extensions};
export url, userinfo, query;
export from_str, to_str;
export get_scheme;
export encode, decode;
export encode_component, decode_component;
export encode_form_urlencoded, decode_form_urlencoded;
type url = {
scheme: ~str,
@ -28,6 +39,223 @@ fn userinfo(-user: ~str, -pass: option<~str>) -> userinfo {
{user: user, pass: pass}
}
fn encode_inner(s: ~str, full_url: bool) -> ~str {
do io::with_str_reader(s) |rdr| {
let mut out = ~"";
while !rdr.eof() {
let ch = rdr.read_byte() as char;
alt ch {
// unreserved:
'A' to 'Z' |
'a' to 'z' |
'0' to '9' |
'-' | '.' | '_' | '~' {
str::push_char(out, ch);
}
_ {
if full_url {
alt ch {
// gen-delims:
':' | '/' | '?' | '#' | '[' | ']' | '@' |
// sub-delims:
'!' | '$' | '&' | '"' | '(' | ')' | '*' |
'+' | ',' | ';' | '=' {
str::push_char(out, ch);
}
_ { out += #fmt("%%%X", ch as uint); }
}
} else {
out += #fmt("%%%X", ch as uint);
}
}
}
}
out
}
}
/** Encodes a URI by replacing reserved characters with percent encoded character
* sequences.
*
* This function is compliant with RFC 3986.
*/
fn encode(s: ~str) -> ~str {
encode_inner(s, true)
}
/** Encodes a URI component by replacing reserved characters with percent encoded
* character sequences.
*
* This function is compliant with RFC 3986.
*/
fn encode_component(s: ~str) -> ~str {
encode_inner(s, false)
}
fn decode_inner(s: ~str, full_url: bool) -> ~str {
do io::with_str_reader(s) |rdr| {
let mut out = ~"";
while !rdr.eof() {
alt rdr.read_char() {
'%' {
let bytes = rdr.read_bytes(2u);
let ch = uint::parse_buf(bytes, 16u).get() as char;
if full_url {
// Only decode some characters:
alt ch {
// gen-delims:
':' | '/' | '?' | '#' | '[' | ']' | '@' |
// sub-delims:
'!' | '$' | '&' | '"' | '(' | ')' | '*' |
'+' | ',' | ';' | '=' {
str::push_char(out, '%');
str::push_char(out, bytes[0u] as char);
str::push_char(out, bytes[1u] as char);
}
ch { str::push_char(out, ch); }
}
} else {
str::push_char(out, ch);
}
}
ch { str::push_char(out, ch); }
}
}
out
}
}
/** Decode a string encoded with percent encoding.
*
* This will only decode escape sequences generated by encode_uri.
*/
fn decode(s: ~str) -> ~str {
decode_inner(s, true)
}
/** Decode a string encoded with percent encoding.
*/
fn decode_component(s: ~str) -> ~str {
decode_inner(s, false)
}
fn encode_plus(s: ~str) -> ~str {
do io::with_str_reader(s) |rdr| {
let mut out = ~"";
while !rdr.eof() {
let ch = rdr.read_byte() as char;
alt ch {
'A' to 'Z' | 'a' to 'z' | '0' to '9' | '_' | '.' | '-' {
str::push_char(out, ch);
}
' ' { str::push_char(out, '+'); }
_ { out += #fmt("%%%X", ch as uint); }
}
}
out
}
}
/** Encode a hashmap to the 'application/x-www-form-urlencoded' media type.
*/
fn encode_form_urlencoded(m: hashmap<~str, @dvec<@~str>>) -> ~str {
let mut out = ~"";
let mut first = true;
for m.each |key, values| {
let key = encode_plus(key);
for (*values).each |value| {
if first {
first = false;
} else {
str::push_char(out, '&');
first = false;
}
out += #fmt("%s=%s", key, encode_plus(*value));
}
}
out
}
/** Decode a string encoded with the 'application/x-www-form-urlencoded' media
* type into a hashmap.
*/
fn decode_form_urlencoded(s: ~[u8]) -> hashmap<~str, @dvec<@~str>> {
do io::with_bytes_reader(s) |rdr| {
let m = str_hash();
let mut key = ~"";
let mut value = ~"";
let mut parsing_key = true;
while !rdr.eof() {
alt rdr.read_char() {
'&' | ';' {
if key != ~"" && value != ~"" {
let values = alt m.find(key) {
some(values) { values }
none {
let values = @dvec();
m.insert(key, values);
values
}
};
(*values).push(@value)
}
parsing_key = true;
key = ~"";
value = ~"";
}
'=' { parsing_key = false; }
ch {
let ch = alt ch {
'%' {
uint::parse_buf(rdr.read_bytes(2u), 16u).get() as char
}
'+' { ' ' }
ch { ch }
};
if parsing_key {
str::push_char(key, ch)
} else {
str::push_char(value, ch)
}
}
}
}
if key != ~"" && value != ~"" {
let values = alt m.find(key) {
some(values) { values }
none {
let values = @dvec();
m.insert(key, values);
values
}
};
(*values).push(@value)
}
m
}
}
fn split_char_first(s: ~str, c: char) -> (~str, ~str) {
let mut v = str::splitn_char(s, c, 1);
if v.len() == 1 {
@ -62,7 +290,7 @@ fn query_from_str(rawquery: ~str) -> query {
if str::len(rawquery) != 0 {
for str::split_char(rawquery, '&').each |p| {
let (k, v) = split_char_first(p, '=');
vec::push(query, (k, v));
vec::push(query, (decode_component(k), decode_component(v)));
};
}
return query;
@ -72,7 +300,7 @@ fn query_to_str(query: query) -> ~str {
let mut strvec = ~[];
for query.each |kv| {
let (k, v) = kv;
strvec += ~[fmt!{"%s=%s", k, v}];
strvec += ~[#fmt("%s=%s", encode_component(k), encode_component(v))];
};
return str::connect(strvec, ~"&");
}
@ -130,7 +358,7 @@ fn from_str(rawurl: ~str) -> result::result<url, ~str> {
let (rest, query) = split_char_first(rest, '?');
let query = query_from_str(query);
let (host, pth) = split_char_first(rest, '/');
let mut path = pth;
let mut path = decode_component(pth);
if str::len(path) != 0 {
str::unshift_char(path, '/');
}
@ -242,4 +470,157 @@ mod tests {
assert to_str(result::unwrap(from_str(url))) == url;
}
#[test]
fn test_url_component_encoding() {
let url = ~"http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B";
let u = result::unwrap(from_str(url));
assert u.path == ~"/doc uments";
assert u.query.find(|kv| kv.first() == ~"ba%d ")
.get().second() == ~"#&+";
}
#[test]
fn test_encode() {
assert encode(~"") == ~"";
assert encode(~"http://example.com") == ~"http://example.com";
assert encode(~"foo bar% baz") == ~"foo%20bar%25%20baz";
assert encode(~" ") == ~"%20";
assert encode(~"!") == ~"!";
assert encode(~"\"") == ~"\"";
assert encode(~"#") == ~"#";
assert encode(~"$") == ~"$";
assert encode(~"%") == ~"%25";
assert encode(~"&") == ~"&";
assert encode(~"'") == ~"%27";
assert encode(~"(") == ~"(";
assert encode(~")") == ~")";
assert encode(~"*") == ~"*";
assert encode(~"+") == ~"+";
assert encode(~",") == ~",";
assert encode(~"/") == ~"/";
assert encode(~":") == ~":";
assert encode(~";") == ~";";
assert encode(~"=") == ~"=";
assert encode(~"?") == ~"?";
assert encode(~"@") == ~"@";
assert encode(~"[") == ~"[";
assert encode(~"]") == ~"]";
}
#[test]
fn test_encode_component() {
assert encode_component(~"") == ~"";
assert encode_component(~"http://example.com") ==
~"http%3A%2F%2Fexample.com";
assert encode_component(~"foo bar% baz") == ~"foo%20bar%25%20baz";
assert encode_component(~" ") == ~"%20";
assert encode_component(~"!") == ~"%21";
assert encode_component(~"#") == ~"%23";
assert encode_component(~"$") == ~"%24";
assert encode_component(~"%") == ~"%25";
assert encode_component(~"&") == ~"%26";
assert encode_component(~"'") == ~"%27";
assert encode_component(~"(") == ~"%28";
assert encode_component(~")") == ~"%29";
assert encode_component(~"*") == ~"%2A";
assert encode_component(~"+") == ~"%2B";
assert encode_component(~",") == ~"%2C";
assert encode_component(~"/") == ~"%2F";
assert encode_component(~":") == ~"%3A";
assert encode_component(~";") == ~"%3B";
assert encode_component(~"=") == ~"%3D";
assert encode_component(~"?") == ~"%3F";
assert encode_component(~"@") == ~"%40";
assert encode_component(~"[") == ~"%5B";
assert encode_component(~"]") == ~"%5D";
}
#[test]
fn test_decode() {
assert decode(~"") == ~"";
assert decode(~"abc/def 123") == ~"abc/def 123";
assert decode(~"abc%2Fdef%20123") == ~"abc%2Fdef 123";
assert decode(~"%20") == ~" ";
assert decode(~"%21") == ~"%21";
assert decode(~"%22") == ~"%22";
assert decode(~"%23") == ~"%23";
assert decode(~"%24") == ~"%24";
assert decode(~"%25") == ~"%";
assert decode(~"%26") == ~"%26";
assert decode(~"%27") == ~"'";
assert decode(~"%28") == ~"%28";
assert decode(~"%29") == ~"%29";
assert decode(~"%2A") == ~"%2A";
assert decode(~"%2B") == ~"%2B";
assert decode(~"%2C") == ~"%2C";
assert decode(~"%2F") == ~"%2F";
assert decode(~"%3A") == ~"%3A";
assert decode(~"%3B") == ~"%3B";
assert decode(~"%3D") == ~"%3D";
assert decode(~"%3F") == ~"%3F";
assert decode(~"%40") == ~"%40";
assert decode(~"%5B") == ~"%5B";
assert decode(~"%5D") == ~"%5D";
}
#[test]
fn test_decode_component() {
assert decode_component(~"") == ~"";
assert decode_component(~"abc/def 123") == ~"abc/def 123";
assert decode_component(~"abc%2Fdef%20123") == ~"abc/def 123";
assert decode_component(~"%20") == ~" ";
assert decode_component(~"%21") == ~"!";
assert decode_component(~"%22") == ~"\"";
assert decode_component(~"%23") == ~"#";
assert decode_component(~"%24") == ~"$";
assert decode_component(~"%25") == ~"%";
assert decode_component(~"%26") == ~"&";
assert decode_component(~"%27") == ~"'";
assert decode_component(~"%28") == ~"(";
assert decode_component(~"%29") == ~")";
assert decode_component(~"%2A") == ~"*";
assert decode_component(~"%2B") == ~"+";
assert decode_component(~"%2C") == ~",";
assert decode_component(~"%2F") == ~"/";
assert decode_component(~"%3A") == ~":";
assert decode_component(~"%3B") == ~";";
assert decode_component(~"%3D") == ~"=";
assert decode_component(~"%3F") == ~"?";
assert decode_component(~"%40") == ~"@";
assert decode_component(~"%5B") == ~"[";
assert decode_component(~"%5D") == ~"]";
}
#[test]
fn test_encode_form_urlencoded() {
let m = str_hash();
assert encode_form_urlencoded(m) == ~"";
m.insert(~"", @dvec());
m.insert(~"foo", @dvec());
assert encode_form_urlencoded(m) == ~"";
let m = str_hash();
m.insert(~"foo", @dvec::from_vec(~[mut @~"bar", @~"123"]));
assert encode_form_urlencoded(m) == ~"foo=bar&foo=123";
let m = str_hash();
m.insert(~"foo bar", @dvec::from_vec(~[mut @~"abc", @~"12 = 34"]));
assert encode_form_urlencoded(m) == ~"foo+bar=abc&foo+bar=12+%3D+34";
}
#[test]
fn test_decode_form_urlencoded() {
import map::hash_from_strs;
assert decode_form_urlencoded(~[]) == str_hash();
let s = str::bytes(~"a=1&foo+bar=abc&foo+bar=12+%3D+34");
assert decode_form_urlencoded(s) == hash_from_strs(~[
(~"a", @dvec::from_elem(@~"1")),
(~"foo bar", @dvec::from_vec(~[mut @~"abc", @~"12 = 34"]))
]);
}
}