Replace regex-based parser for URL lines with open-coded one.

This commit is contained in:
Zack Weinberg 2017-02-13 15:44:51 -05:00
parent 5817351048
commit ff4758c2a0
3 changed files with 46 additions and 25 deletions

View file

@ -4,5 +4,3 @@ version = "0.1.0"
authors = ["Alex Crichton <alex@alexcrichton.com>"]
[dependencies]
regex = "*"
lazy_static = "*"

View file

@ -14,9 +14,6 @@
//! etc. This is run by default on `make check` and as part of the auto
//! builders.
extern crate regex;
#[macro_use] extern crate lazy_static;
use std::fs;
use std::path::{PathBuf, Path};
use std::env;

View file

@ -26,8 +26,6 @@ use std::fs::File;
use std::io::prelude::*;
use std::path::Path;
use regex::Regex;
const COLS: usize = 100;
const LICENSE: &'static str = "\
Copyright <year> The Rust Project Developers. See the COPYRIGHT
@ -40,26 +38,54 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
option. This file may not be copied, modified, or distributed
except according to those terms.";
/// True if LINE is allowed to be longer than the normal limit.
///
/// Currently there is only one exception: if the line is within a
/// comment, and its entire text is one URL (possibly with a Markdown
/// link label in front), then it's allowed to be overlength. This is
/// because Markdown offers no way to split a line in the middle of a
/// URL, and the length of URLs for external references is beyond our
/// control.
fn long_line_is_ok(line: &str) -> bool {
lazy_static! {
static ref URL_RE: Regex = Regex::new(
// This regexp uses the CommonMark definition of link
// label. It thinks any sequence of nonwhitespace
// characters beginning with "http://" or "https://" is a
// URL. Add more schemas as necessary.
r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$"
).unwrap();
/// Parser states for line_is_url.
#[derive(PartialEq)]
#[allow(non_camel_case_types)]
enum LIUState { EXP_COMMENT_START,
EXP_LINK_LABEL_OR_URL,
EXP_URL,
EXP_END }
/// True if LINE appears to be a line comment containing an URL,
/// possibly with a Markdown link label in front, and nothing else.
/// The Markdown link label, if present, may not contain whitespace.
/// Lines of this form are allowed to be overlength, because Markdown
/// offers no way to split a line in the middle of a URL, and the lengths
/// of URLs to external references are beyond our control.
fn line_is_url(line: &str) -> bool {
use self::LIUState::*;
let mut state: LIUState = EXP_COMMENT_START;
for tok in line.split_whitespace() {
match (state, tok) {
(EXP_COMMENT_START, "//") => state = EXP_LINK_LABEL_OR_URL,
(EXP_COMMENT_START, "///") => state = EXP_LINK_LABEL_OR_URL,
(EXP_COMMENT_START, "//!") => state = EXP_LINK_LABEL_OR_URL,
(EXP_LINK_LABEL_OR_URL, w)
if w.len() >= 4 && w.starts_with("[") && w.ends_with("]:")
=> state = EXP_URL,
(EXP_LINK_LABEL_OR_URL, w)
if w.starts_with("http://") || w.starts_with("https://")
=> state = EXP_END,
(EXP_URL, w)
if w.starts_with("http://") || w.starts_with("https://")
=> state = EXP_END,
(_, _) => return false,
}
}
if URL_RE.is_match(line) {
state == EXP_END
}
/// True if LINE is allowed to be longer than the normal limit.
/// Currently there is only one exception, for long URLs, but more
/// may be added in the future.
fn long_line_is_ok(line: &str) -> bool {
if line_is_url(line) {
return true;
}