From c2b0d7dd8818a0dca9b1fa7af6873375907f05ca Mon Sep 17 00:00:00 2001 From: Eric Kidd Date: Sat, 13 Dec 2014 13:33:18 -0500 Subject: [PATCH] Modify `regex::Captures::{at,name}` to return `Option` Closes #14602. As discussed in that issue, the existing `at` and `name` functions represent two different results with the empty string: 1. Matched the empty string. 2. Did not match anything. Consider the following example. This regex has two named matched groups, `key` and `value`. `value` is optional: ```rust // Matches "foo", "foo;v=bar" and "foo;v=". regex!(r"(?P[a-z]+)(;v=(?P[a-z]*))?"); ``` We can access `value` using `caps.name("value")`, but there's no way for us to distinguish between the `"foo"` and `"foo;v="` cases. Early this year, @BurntSushi recommended modifying the existing `at` and `name` functions to return `Option`, instead of adding new functions to the API. This is a [breaking-change], but the fix is easy: - `refs.at(1)` becomes `refs.at(1).unwrap_or("")`. - `refs.name(name)` becomes `refs.name(name).unwrap_or("")`. --- src/compiletest/compiletest.rs | 4 +-- src/compiletest/errors.rs | 8 +++--- src/grammar/verify.rs | 8 +++--- src/libregex/lib.rs | 6 +++-- src/libregex/re.rs | 49 +++++++++++++++++----------------- 5 files changed, 38 insertions(+), 37 deletions(-) diff --git a/src/compiletest/compiletest.rs b/src/compiletest/compiletest.rs index 47ab675aff93..375dd138c229 100644 --- a/src/compiletest/compiletest.rs +++ b/src/compiletest/compiletest.rs @@ -393,7 +393,7 @@ fn extract_gdb_version(full_version_line: Option) -> Option { match re.captures(full_version_line) { Some(captures) => { - Some(captures.at(2).to_string()) + Some(captures.at(2).unwrap_or("").to_string()) } None => { println!("Could not extract GDB version from line '{}'", @@ -427,7 +427,7 @@ fn extract_lldb_version(full_version_line: Option) -> Option { match re.captures(full_version_line) { Some(captures) => { - Some(captures.at(1).to_string()) + Some(captures.at(1).unwrap_or("").to_string()) } None => { println!("Could not extract LLDB version from line '{}'", diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs index f15db7d9371d..b7df43aabdd2 100644 --- a/src/compiletest/errors.rs +++ b/src/compiletest/errors.rs @@ -66,10 +66,10 @@ fn parse_expected(last_nonfollow_error: Option, line: &str, re: &Regex) -> Option<(WhichLine, ExpectedError)> { re.captures(line).and_then(|caps| { - let adjusts = caps.name("adjusts").len(); - let kind = caps.name("kind").to_ascii_lower(); - let msg = caps.name("msg").trim().to_string(); - let follow = caps.name("follow").len() > 0; + let adjusts = caps.name("adjusts").unwrap_or("").len(); + let kind = caps.name("kind").unwrap_or("").to_ascii_lower(); + let msg = caps.name("msg").unwrap_or("").trim().to_string(); + let follow = caps.name("follow").unwrap_or("").len() > 0; let (which, line) = if follow { assert!(adjusts == 0, "use either //~| or //~^, not both."); diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index e3ff20f7874b..f7b19cf6fbf5 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -173,10 +173,10 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAn ); let m = re.captures(s).expect(format!("The regex didn't match {}", s).as_slice()); - let start = m.name("start"); - let end = m.name("end"); - let toknum = m.name("toknum"); - let content = m.name("content"); + let start = m.name("start").unwrap_or(""); + let end = m.name("end").unwrap_or(""); + let toknum = m.name("toknum").unwrap_or(""); + let content = m.name("content").unwrap_or(""); let proto_tok = tokens.get(toknum).expect(format!("didn't find token {} in the map", toknum).as_slice()); diff --git a/src/libregex/lib.rs b/src/libregex/lib.rs index 05f853a851ea..3fadba9583ec 100644 --- a/src/libregex/lib.rs +++ b/src/libregex/lib.rs @@ -103,7 +103,9 @@ //! let re = regex!(r"(\d{4})-(\d{2})-(\d{2})"); //! let text = "2012-03-14, 2013-01-01 and 2014-07-05"; //! for cap in re.captures_iter(text) { -//! println!("Month: {} Day: {} Year: {}", cap.at(2), cap.at(3), cap.at(1)); +//! println!("Month: {} Day: {} Year: {}", +//! cap.at(2).unwrap_or(""), cap.at(3).unwrap_or(""), +//! cap.at(1).unwrap_or("")); //! } //! // Output: //! // Month: 03 Day: 14 Year: 2012 @@ -285,7 +287,7 @@ //! # fn main() { //! let re = regex!(r"(?i)a+(?-i)b+"); //! let cap = re.captures("AaAaAbbBBBb").unwrap(); -//! assert_eq!(cap.at(0), "AaAaAbb"); +//! assert_eq!(cap.at(0), Some("AaAaAbb")); //! # } //! ``` //! diff --git a/src/libregex/re.rs b/src/libregex/re.rs index 1504e1919852..53181bfbb7e3 100644 --- a/src/libregex/re.rs +++ b/src/libregex/re.rs @@ -273,9 +273,9 @@ impl Regex { /// let re = regex!(r"'([^']+)'\s+\((\d{4})\)"); /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.at(1), "Citizen Kane"); - /// assert_eq!(caps.at(2), "1941"); - /// assert_eq!(caps.at(0), "'Citizen Kane' (1941)"); + /// assert_eq!(caps.at(1), Some("Citizen Kane")); + /// assert_eq!(caps.at(2), Some("1941")); + /// assert_eq!(caps.at(0), Some("'Citizen Kane' (1941)")); /// # } /// ``` /// @@ -291,9 +291,9 @@ impl Regex { /// let re = regex!(r"'(?P[^']+)'\s+\((?P<year>\d{4})\)"); /// let text = "Not my favorite movie: 'Citizen Kane' (1941)."; /// let caps = re.captures(text).unwrap(); - /// assert_eq!(caps.name("title"), "Citizen Kane"); - /// assert_eq!(caps.name("year"), "1941"); - /// assert_eq!(caps.at(0), "'Citizen Kane' (1941)"); + /// assert_eq!(caps.name("title"), Some("Citizen Kane")); + /// assert_eq!(caps.name("year"), Some("1941")); + /// assert_eq!(caps.at(0), Some("'Citizen Kane' (1941)")); /// # } /// ``` /// @@ -434,7 +434,7 @@ impl Regex { /// # use regex::Captures; fn main() { /// let re = regex!(r"([^,\s]+),\s+(\S+)"); /// let result = re.replace("Springsteen, Bruce", |&: caps: &Captures| { - /// format!("{} {}", caps.at(2), caps.at(1)) + /// format!("{} {}", caps.at(2).unwrap_or(""), caps.at(1).unwrap_or("")) /// }); /// assert_eq!(result.as_slice(), "Bruce Springsteen"); /// # } @@ -712,27 +712,25 @@ impl<'t> Captures<'t> { Some((self.locs[s].unwrap(), self.locs[e].unwrap())) } - /// Returns the matched string for the capture group `i`. - /// If `i` isn't a valid capture group or didn't match anything, then the - /// empty string is returned. - pub fn at(&self, i: uint) -> &'t str { + /// Returns the matched string for the capture group `i`. If `i` isn't + /// a valid capture group or didn't match anything, then `None` is + /// returned. + pub fn at(&self, i: uint) -> Option<&'t str> { match self.pos(i) { - None => "", - Some((s, e)) => { - self.text.slice(s, e) - } + None => None, + Some((s, e)) => Some(self.text.slice(s, e)) } } - /// Returns the matched string for the capture group named `name`. - /// If `name` isn't a valid capture group or didn't match anything, then - /// the empty string is returned. - pub fn name(&self, name: &str) -> &'t str { + /// Returns the matched string for the capture group named `name`. If + /// `name` isn't a valid capture group or didn't match anything, then + /// `None` is returned. + pub fn name(&self, name: &str) -> Option<&'t str> { match self.named { - None => "", + None => None, Some(ref h) => { match h.get(name) { - None => "", + None => None, Some(i) => self.at(*i), } } @@ -769,11 +767,12 @@ impl<'t> Captures<'t> { // FIXME: Don't use regexes for this. It's completely unnecessary. let re = Regex::new(r"(^|[^$]|\b)\$(\w+)").unwrap(); let text = re.replace_all(text, |&mut: refs: &Captures| -> String { - let (pre, name) = (refs.at(1), refs.at(2)); + let pre = refs.at(1).unwrap_or(""); + let name = refs.at(2).unwrap_or(""); format!("{}{}", pre, match from_str::<uint>(name.as_slice()) { - None => self.name(name).to_string(), - Some(i) => self.at(i).to_string(), + None => self.name(name).unwrap_or("").to_string(), + Some(i) => self.at(i).unwrap_or("").to_string(), }) }); let re = Regex::new(r"\$\$").unwrap(); @@ -802,7 +801,7 @@ impl<'t> Iterator<&'t str> for SubCaptures<'t> { fn next(&mut self) -> Option<&'t str> { if self.idx < self.caps.len() { self.idx += 1; - Some(self.caps.at(self.idx - 1)) + Some(self.caps.at(self.idx - 1).unwrap_or("")) } else { None }