std: integrating erickt's url encoding/decoding from github.com/erickt/rust-uri into std::net::url
This commit is contained in:
parent
dfe1f6260e
commit
ef46314d1e
1 changed files with 385 additions and 4 deletions
|
|
@ -1,6 +1,17 @@
|
|||
//! Types/fns concerning URLs (see RFC 3986)
|
||||
|
||||
export url, userinfo, query, from_str, to_str, get_scheme;
|
||||
import map;
|
||||
import map::{hashmap, str_hash};
|
||||
import io::{reader, reader_util};
|
||||
import dvec::{dvec, extensions};
|
||||
|
||||
export url, userinfo, query;
|
||||
export from_str, to_str;
|
||||
export get_scheme;
|
||||
|
||||
export encode, decode;
|
||||
export encode_component, decode_component;
|
||||
export encode_form_urlencoded, decode_form_urlencoded;
|
||||
|
||||
type url = {
|
||||
scheme: ~str,
|
||||
|
|
@ -28,6 +39,223 @@ fn userinfo(-user: ~str, -pass: option<~str>) -> userinfo {
|
|||
{user: user, pass: pass}
|
||||
}
|
||||
|
||||
fn encode_inner(s: ~str, full_url: bool) -> ~str {
|
||||
do io::with_str_reader(s) |rdr| {
|
||||
let mut out = ~"";
|
||||
|
||||
while !rdr.eof() {
|
||||
let ch = rdr.read_byte() as char;
|
||||
alt ch {
|
||||
// unreserved:
|
||||
'A' to 'Z' |
|
||||
'a' to 'z' |
|
||||
'0' to '9' |
|
||||
'-' | '.' | '_' | '~' {
|
||||
str::push_char(out, ch);
|
||||
}
|
||||
_ {
|
||||
if full_url {
|
||||
alt ch {
|
||||
// gen-delims:
|
||||
':' | '/' | '?' | '#' | '[' | ']' | '@' |
|
||||
|
||||
// sub-delims:
|
||||
'!' | '$' | '&' | '"' | '(' | ')' | '*' |
|
||||
'+' | ',' | ';' | '=' {
|
||||
str::push_char(out, ch);
|
||||
}
|
||||
|
||||
_ { out += #fmt("%%%X", ch as uint); }
|
||||
}
|
||||
} else {
|
||||
out += #fmt("%%%X", ch as uint);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
/** Encodes a URI by replacing reserved characters with percent encoded character
|
||||
* sequences.
|
||||
*
|
||||
* This function is compliant with RFC 3986.
|
||||
*/
|
||||
fn encode(s: ~str) -> ~str {
|
||||
encode_inner(s, true)
|
||||
}
|
||||
|
||||
/** Encodes a URI component by replacing reserved characters with percent encoded
|
||||
* character sequences.
|
||||
*
|
||||
* This function is compliant with RFC 3986.
|
||||
*/
|
||||
fn encode_component(s: ~str) -> ~str {
|
||||
encode_inner(s, false)
|
||||
}
|
||||
|
||||
fn decode_inner(s: ~str, full_url: bool) -> ~str {
|
||||
do io::with_str_reader(s) |rdr| {
|
||||
let mut out = ~"";
|
||||
|
||||
while !rdr.eof() {
|
||||
alt rdr.read_char() {
|
||||
'%' {
|
||||
let bytes = rdr.read_bytes(2u);
|
||||
let ch = uint::parse_buf(bytes, 16u).get() as char;
|
||||
|
||||
if full_url {
|
||||
// Only decode some characters:
|
||||
alt ch {
|
||||
// gen-delims:
|
||||
':' | '/' | '?' | '#' | '[' | ']' | '@' |
|
||||
|
||||
// sub-delims:
|
||||
'!' | '$' | '&' | '"' | '(' | ')' | '*' |
|
||||
'+' | ',' | ';' | '=' {
|
||||
str::push_char(out, '%');
|
||||
str::push_char(out, bytes[0u] as char);
|
||||
str::push_char(out, bytes[1u] as char);
|
||||
}
|
||||
|
||||
ch { str::push_char(out, ch); }
|
||||
}
|
||||
} else {
|
||||
str::push_char(out, ch);
|
||||
}
|
||||
}
|
||||
ch { str::push_char(out, ch); }
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
/** Decode a string encoded with percent encoding.
|
||||
*
|
||||
* This will only decode escape sequences generated by encode_uri.
|
||||
*/
|
||||
fn decode(s: ~str) -> ~str {
|
||||
decode_inner(s, true)
|
||||
}
|
||||
|
||||
/** Decode a string encoded with percent encoding.
|
||||
*/
|
||||
fn decode_component(s: ~str) -> ~str {
|
||||
decode_inner(s, false)
|
||||
}
|
||||
|
||||
fn encode_plus(s: ~str) -> ~str {
|
||||
do io::with_str_reader(s) |rdr| {
|
||||
let mut out = ~"";
|
||||
|
||||
while !rdr.eof() {
|
||||
let ch = rdr.read_byte() as char;
|
||||
alt ch {
|
||||
'A' to 'Z' | 'a' to 'z' | '0' to '9' | '_' | '.' | '-' {
|
||||
str::push_char(out, ch);
|
||||
}
|
||||
' ' { str::push_char(out, '+'); }
|
||||
_ { out += #fmt("%%%X", ch as uint); }
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
/** Encode a hashmap to the 'application/x-www-form-urlencoded' media type.
|
||||
*/
|
||||
fn encode_form_urlencoded(m: hashmap<~str, @dvec<@~str>>) -> ~str {
|
||||
let mut out = ~"";
|
||||
let mut first = true;
|
||||
|
||||
for m.each |key, values| {
|
||||
let key = encode_plus(key);
|
||||
|
||||
for (*values).each |value| {
|
||||
if first {
|
||||
first = false;
|
||||
} else {
|
||||
str::push_char(out, '&');
|
||||
first = false;
|
||||
}
|
||||
|
||||
out += #fmt("%s=%s", key, encode_plus(*value));
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/** Decode a string encoded with the 'application/x-www-form-urlencoded' media
|
||||
* type into a hashmap.
|
||||
*/
|
||||
fn decode_form_urlencoded(s: ~[u8]) -> hashmap<~str, @dvec<@~str>> {
|
||||
do io::with_bytes_reader(s) |rdr| {
|
||||
let m = str_hash();
|
||||
let mut key = ~"";
|
||||
let mut value = ~"";
|
||||
let mut parsing_key = true;
|
||||
|
||||
while !rdr.eof() {
|
||||
alt rdr.read_char() {
|
||||
'&' | ';' {
|
||||
if key != ~"" && value != ~"" {
|
||||
let values = alt m.find(key) {
|
||||
some(values) { values }
|
||||
none {
|
||||
let values = @dvec();
|
||||
m.insert(key, values);
|
||||
values
|
||||
}
|
||||
};
|
||||
(*values).push(@value)
|
||||
}
|
||||
|
||||
parsing_key = true;
|
||||
key = ~"";
|
||||
value = ~"";
|
||||
}
|
||||
'=' { parsing_key = false; }
|
||||
ch {
|
||||
let ch = alt ch {
|
||||
'%' {
|
||||
uint::parse_buf(rdr.read_bytes(2u), 16u).get() as char
|
||||
}
|
||||
'+' { ' ' }
|
||||
ch { ch }
|
||||
};
|
||||
|
||||
if parsing_key {
|
||||
str::push_char(key, ch)
|
||||
} else {
|
||||
str::push_char(value, ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if key != ~"" && value != ~"" {
|
||||
let values = alt m.find(key) {
|
||||
some(values) { values }
|
||||
none {
|
||||
let values = @dvec();
|
||||
m.insert(key, values);
|
||||
values
|
||||
}
|
||||
};
|
||||
(*values).push(@value)
|
||||
}
|
||||
|
||||
m
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn split_char_first(s: ~str, c: char) -> (~str, ~str) {
|
||||
let mut v = str::splitn_char(s, c, 1);
|
||||
if v.len() == 1 {
|
||||
|
|
@ -62,7 +290,7 @@ fn query_from_str(rawquery: ~str) -> query {
|
|||
if str::len(rawquery) != 0 {
|
||||
for str::split_char(rawquery, '&').each |p| {
|
||||
let (k, v) = split_char_first(p, '=');
|
||||
vec::push(query, (k, v));
|
||||
vec::push(query, (decode_component(k), decode_component(v)));
|
||||
};
|
||||
}
|
||||
return query;
|
||||
|
|
@ -72,7 +300,7 @@ fn query_to_str(query: query) -> ~str {
|
|||
let mut strvec = ~[];
|
||||
for query.each |kv| {
|
||||
let (k, v) = kv;
|
||||
strvec += ~[fmt!{"%s=%s", k, v}];
|
||||
strvec += ~[#fmt("%s=%s", encode_component(k), encode_component(v))];
|
||||
};
|
||||
return str::connect(strvec, ~"&");
|
||||
}
|
||||
|
|
@ -130,7 +358,7 @@ fn from_str(rawurl: ~str) -> result::result<url, ~str> {
|
|||
let (rest, query) = split_char_first(rest, '?');
|
||||
let query = query_from_str(query);
|
||||
let (host, pth) = split_char_first(rest, '/');
|
||||
let mut path = pth;
|
||||
let mut path = decode_component(pth);
|
||||
if str::len(path) != 0 {
|
||||
str::unshift_char(path, '/');
|
||||
}
|
||||
|
|
@ -242,4 +470,157 @@ mod tests {
|
|||
assert to_str(result::unwrap(from_str(url))) == url;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_component_encoding() {
|
||||
let url = ~"http://rust-lang.org/doc%20uments?ba%25d%20=%23%26%2B";
|
||||
let u = result::unwrap(from_str(url));
|
||||
assert u.path == ~"/doc uments";
|
||||
assert u.query.find(|kv| kv.first() == ~"ba%d ")
|
||||
.get().second() == ~"#&+";
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode() {
|
||||
assert encode(~"") == ~"";
|
||||
assert encode(~"http://example.com") == ~"http://example.com";
|
||||
assert encode(~"foo bar% baz") == ~"foo%20bar%25%20baz";
|
||||
assert encode(~" ") == ~"%20";
|
||||
assert encode(~"!") == ~"!";
|
||||
assert encode(~"\"") == ~"\"";
|
||||
assert encode(~"#") == ~"#";
|
||||
assert encode(~"$") == ~"$";
|
||||
assert encode(~"%") == ~"%25";
|
||||
assert encode(~"&") == ~"&";
|
||||
assert encode(~"'") == ~"%27";
|
||||
assert encode(~"(") == ~"(";
|
||||
assert encode(~")") == ~")";
|
||||
assert encode(~"*") == ~"*";
|
||||
assert encode(~"+") == ~"+";
|
||||
assert encode(~",") == ~",";
|
||||
assert encode(~"/") == ~"/";
|
||||
assert encode(~":") == ~":";
|
||||
assert encode(~";") == ~";";
|
||||
assert encode(~"=") == ~"=";
|
||||
assert encode(~"?") == ~"?";
|
||||
assert encode(~"@") == ~"@";
|
||||
assert encode(~"[") == ~"[";
|
||||
assert encode(~"]") == ~"]";
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_component() {
|
||||
assert encode_component(~"") == ~"";
|
||||
assert encode_component(~"http://example.com") ==
|
||||
~"http%3A%2F%2Fexample.com";
|
||||
assert encode_component(~"foo bar% baz") == ~"foo%20bar%25%20baz";
|
||||
assert encode_component(~" ") == ~"%20";
|
||||
assert encode_component(~"!") == ~"%21";
|
||||
assert encode_component(~"#") == ~"%23";
|
||||
assert encode_component(~"$") == ~"%24";
|
||||
assert encode_component(~"%") == ~"%25";
|
||||
assert encode_component(~"&") == ~"%26";
|
||||
assert encode_component(~"'") == ~"%27";
|
||||
assert encode_component(~"(") == ~"%28";
|
||||
assert encode_component(~")") == ~"%29";
|
||||
assert encode_component(~"*") == ~"%2A";
|
||||
assert encode_component(~"+") == ~"%2B";
|
||||
assert encode_component(~",") == ~"%2C";
|
||||
assert encode_component(~"/") == ~"%2F";
|
||||
assert encode_component(~":") == ~"%3A";
|
||||
assert encode_component(~";") == ~"%3B";
|
||||
assert encode_component(~"=") == ~"%3D";
|
||||
assert encode_component(~"?") == ~"%3F";
|
||||
assert encode_component(~"@") == ~"%40";
|
||||
assert encode_component(~"[") == ~"%5B";
|
||||
assert encode_component(~"]") == ~"%5D";
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode() {
|
||||
assert decode(~"") == ~"";
|
||||
assert decode(~"abc/def 123") == ~"abc/def 123";
|
||||
assert decode(~"abc%2Fdef%20123") == ~"abc%2Fdef 123";
|
||||
assert decode(~"%20") == ~" ";
|
||||
assert decode(~"%21") == ~"%21";
|
||||
assert decode(~"%22") == ~"%22";
|
||||
assert decode(~"%23") == ~"%23";
|
||||
assert decode(~"%24") == ~"%24";
|
||||
assert decode(~"%25") == ~"%";
|
||||
assert decode(~"%26") == ~"%26";
|
||||
assert decode(~"%27") == ~"'";
|
||||
assert decode(~"%28") == ~"%28";
|
||||
assert decode(~"%29") == ~"%29";
|
||||
assert decode(~"%2A") == ~"%2A";
|
||||
assert decode(~"%2B") == ~"%2B";
|
||||
assert decode(~"%2C") == ~"%2C";
|
||||
assert decode(~"%2F") == ~"%2F";
|
||||
assert decode(~"%3A") == ~"%3A";
|
||||
assert decode(~"%3B") == ~"%3B";
|
||||
assert decode(~"%3D") == ~"%3D";
|
||||
assert decode(~"%3F") == ~"%3F";
|
||||
assert decode(~"%40") == ~"%40";
|
||||
assert decode(~"%5B") == ~"%5B";
|
||||
assert decode(~"%5D") == ~"%5D";
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_component() {
|
||||
assert decode_component(~"") == ~"";
|
||||
assert decode_component(~"abc/def 123") == ~"abc/def 123";
|
||||
assert decode_component(~"abc%2Fdef%20123") == ~"abc/def 123";
|
||||
assert decode_component(~"%20") == ~" ";
|
||||
assert decode_component(~"%21") == ~"!";
|
||||
assert decode_component(~"%22") == ~"\"";
|
||||
assert decode_component(~"%23") == ~"#";
|
||||
assert decode_component(~"%24") == ~"$";
|
||||
assert decode_component(~"%25") == ~"%";
|
||||
assert decode_component(~"%26") == ~"&";
|
||||
assert decode_component(~"%27") == ~"'";
|
||||
assert decode_component(~"%28") == ~"(";
|
||||
assert decode_component(~"%29") == ~")";
|
||||
assert decode_component(~"%2A") == ~"*";
|
||||
assert decode_component(~"%2B") == ~"+";
|
||||
assert decode_component(~"%2C") == ~",";
|
||||
assert decode_component(~"%2F") == ~"/";
|
||||
assert decode_component(~"%3A") == ~":";
|
||||
assert decode_component(~"%3B") == ~";";
|
||||
assert decode_component(~"%3D") == ~"=";
|
||||
assert decode_component(~"%3F") == ~"?";
|
||||
assert decode_component(~"%40") == ~"@";
|
||||
assert decode_component(~"%5B") == ~"[";
|
||||
assert decode_component(~"%5D") == ~"]";
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encode_form_urlencoded() {
|
||||
let m = str_hash();
|
||||
assert encode_form_urlencoded(m) == ~"";
|
||||
|
||||
m.insert(~"", @dvec());
|
||||
m.insert(~"foo", @dvec());
|
||||
assert encode_form_urlencoded(m) == ~"";
|
||||
|
||||
let m = str_hash();
|
||||
m.insert(~"foo", @dvec::from_vec(~[mut @~"bar", @~"123"]));
|
||||
assert encode_form_urlencoded(m) == ~"foo=bar&foo=123";
|
||||
|
||||
let m = str_hash();
|
||||
m.insert(~"foo bar", @dvec::from_vec(~[mut @~"abc", @~"12 = 34"]));
|
||||
assert encode_form_urlencoded(m) == ~"foo+bar=abc&foo+bar=12+%3D+34";
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decode_form_urlencoded() {
|
||||
import map::hash_from_strs;
|
||||
|
||||
assert decode_form_urlencoded(~[]) == str_hash();
|
||||
|
||||
let s = str::bytes(~"a=1&foo+bar=abc&foo+bar=12+%3D+34");
|
||||
assert decode_form_urlencoded(s) == hash_from_strs(~[
|
||||
(~"a", @dvec::from_elem(@~"1")),
|
||||
(~"foo bar", @dvec::from_vec(~[mut @~"abc", @~"12 = 34"]))
|
||||
]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue