From 2eebe614c7ce829cf158e33ca4cce7c7cdda2217 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Fri, 16 Feb 2018 23:21:57 +0100 Subject: [PATCH 01/13] Attempt at checking for license (#209) I'm not quite sure how best to handle loading the license template from a path -- I mean obviously I know *how* to do it, but I'm not sure where to fit it in the codebase :) So this first attempt puts the license template directly into the config file. These are my misgivings about the license template config option as a path to a file (I'd love feedback if some of these are wrong or can be easily circumvented!): 1. I thought the obvious choice for the type of `license_template` in `create_config!` should be `PathBuf`, but `PathBuf` doesn't implement `FromStr` (yet? see https://github.com/rust-lang/rust/issues/44431), so it would have to be wrapped in a tuple struct, and I went down that road for a little while but then it seemed like too much ceremony for too little gain. 2. So a plain `String` then (which, mind you, also means the same `doc_hint()`, i.e. ``, not `` or something like that). The fact that it's a valid path will be checked once we try to read the file. 3. But where in the code should the license template be read? The obvious choice for me would be somewhere in `Config::from_toml()`, but since `Config` is defined via the `create_config!` macro, that would mean tight coupling between the macro invocation (which defines the configuration option `license_template`) and its definition (which would rely on the existence of that option to run the template loading code). 4. `license_template` could also be made a special option which is hardwired into the macro. This gets rid of the tight coupling, but special-casing one of the config options would make the code harder to navigate. 5. Instead, the macro could maybe be rewritten to allow for config options that load additional resources from files when the config is being parsed, but that's beyond my skill level I'm afraid (and probably overengineering the problem if it's only ever going to be used for this one option). 6. Finally, the file can be loaded at some later point in time, e.g. in `format_lines()`, right before `check_license()` is called. But to face a potential *IO* error at so late a stage, when the source files have already been parsed... I don't know, it doesn't feel right. BTW I don't like that I'm actually parsing the license template as late as inside `check_license()` either, but for much the same reasons, I don't know where else to put it. If the `Config` were hand-rolled instead of a macro, I'd just define a custom `license_template` option and load and parse the template in the `Config`'s init. But the way things are, I'm a bit at a loss. However, if someone more familiar with the project would kindly provide a few hints as to how the path approach can be done in a way that is as clean as possible in the context of the codebase, I'll be more than happy to implement it! :) --- src/config/mod.rs | 1 + src/lib.rs | 109 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 106 insertions(+), 4 deletions(-) diff --git a/src/config/mod.rs b/src/config/mod.rs index 7e9fae81318a..c16d5bb67995 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -50,6 +50,7 @@ create_config! { comment_width: usize, 80, false, "Maximum length of comments. No effect unless wrap_comments = true"; normalize_comments: bool, false, true, "Convert /* */ comments to // comments where possible"; + license_template: String, String::default(), false, "Check for license"; // Single line expressions and items. empty_item_single_line: bool, true, false, diff --git a/src/lib.rs b/src/lib.rs index a3302a86a0f4..858a273a1e5e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,6 +43,7 @@ use syntax::ast; use syntax::codemap::{CodeMap, FilePathMapping}; pub use syntax::codemap::FileName; use syntax::parse::{self, ParseSess}; +use regex::{Regex, RegexBuilder}; use checkstyle::{output_footer, output_header}; use comment::{CharClasses, FullCodeCharKind}; @@ -99,6 +100,10 @@ pub enum ErrorKind { TrailingWhitespace, // TO-DO or FIX-ME item without an issue number BadIssue(Issue), + // License check has failed + LicenseCheck, + // License template could not be parsed + ParsingLicense, } impl fmt::Display for ErrorKind { @@ -111,6 +116,8 @@ impl fmt::Display for ErrorKind { ), ErrorKind::TrailingWhitespace => write!(fmt, "left behind trailing whitespace"), ErrorKind::BadIssue(issue) => write!(fmt, "found {}", issue), + ErrorKind::LicenseCheck => write!(fmt, "license check failed"), + ErrorKind::ParsingLicense => write!(fmt, "parsing regex in license template failed"), } } } @@ -127,7 +134,10 @@ pub struct FormattingError { impl FormattingError { fn msg_prefix(&self) -> &str { match self.kind { - ErrorKind::LineOverflow(..) | ErrorKind::TrailingWhitespace => "error:", + ErrorKind::LineOverflow(..) + | ErrorKind::TrailingWhitespace + | ErrorKind::LicenseCheck + | ErrorKind::ParsingLicense => "error:", ErrorKind::BadIssue(_) => "WARNING:", } } @@ -405,8 +415,39 @@ fn should_report_error( } } +fn check_license(text: &str, license_template: &str) -> Result { + let mut template_re = String::from("^"); + // the template is parsed as a series of pairs of capture groups of (1) lazy whatever, which + // will be matched literally, followed by (2) a {}-delimited block, which will be matched as a + // regex + let template_parser = RegexBuilder::new(r"(.*?)\{(.*?)\}") + .dot_matches_new_line(true) + .build() + .unwrap(); + // keep track of the last matched offset and ultimately append the tail of the template (if any) + // after the last {} block + let mut last_matched_offset = 0; + for caps in template_parser.captures_iter(license_template) { + if let Some(mat) = caps.get(0) { + last_matched_offset = mat.end() + } + if let Some(mat) = caps.get(1) { + template_re.push_str(®ex::escape(mat.as_str())) + } + if let Some(mat) = caps.get(2) { + let mut re = mat.as_str(); + if re.is_empty() { + re = ".*?"; + } + template_re.push_str(re) + } + } + template_re.push_str(®ex::escape(&license_template[last_matched_offset..])); + let template_re = Regex::new(&template_re)?; + Ok(template_re.is_match(text)) +} + // Formatting done on a char by char or line by line basis. -// FIXME(#209) warn on bad license // FIXME(#20) other stuff for parity with make tidy fn format_lines( text: &mut String, @@ -415,7 +456,6 @@ fn format_lines( config: &Config, report: &mut FormatReport, ) { - // Iterate over the chars in the file map. let mut trims = vec![]; let mut last_wspace: Option = None; let mut line_len = 0; @@ -428,6 +468,33 @@ fn format_lines( let mut format_line = config.file_lines().contains_line(name, cur_line); let allow_issue_seek = !issue_seeker.is_disabled(); + // Check license. + if config.was_set().license_template() { + match check_license(text, &config.license_template()) { + Ok(check) => { + if !check { + errors.push(FormattingError { + line: cur_line, + kind: ErrorKind::LicenseCheck, + is_comment: false, + is_string: false, + line_buffer: String::new(), + }); + } + } + Err(_) => { + errors.push(FormattingError { + line: cur_line, + kind: ErrorKind::ParsingLicense, + is_comment: false, + is_string: false, + line_buffer: String::new(), + }); + } + } + } + + // Iterate over the chars in the file map. for (kind, (b, c)) in CharClasses::new(text.chars().enumerate()) { if c == '\r' { continue; @@ -853,7 +920,7 @@ pub fn run(input: Input, config: &Config) -> Summary { #[cfg(test)] mod test { - use super::{format_code_block, format_snippet, Config}; + use super::{check_license, format_code_block, format_snippet, Config}; #[test] fn test_no_panic_on_format_snippet_and_format_code_block() { @@ -939,4 +1006,38 @@ false, };"; assert!(test_format_inner(format_code_block, code_block, expected)); } + + #[test] + fn test_check_license() { + assert!(check_license("literal matching", "literal matching").unwrap()); + assert!(!check_license("literal no match", "literal matching").unwrap()); + assert!( + check_license( + "Regex start and end: 2018", + r"{[Rr]egex} start {} end: {\d+}" + ).unwrap() + ); + assert!(!check_license( + "Regex start and end no match: 2018", + r"{[Rr]egex} start {} end: {\d+}" + ).unwrap()); + assert!( + check_license( + "Regex in the middle: 2018 (tm)", + r"Regex {} middle: {\d+} (tm)" + ).unwrap() + ); + assert!(!check_license( + "Regex in the middle no match: 2018 (tm)", + r"Regex {} middle: {\d+} (tm)" + ).unwrap()); + assert!(!check_license("default doesn't match\nacross lines", "default {} lines").unwrap()); + assert!(check_license("", "this is not a valid {[regex}").is_err()); + assert!( + check_license( + "can't parse nested delimiters with regex", + r"can't parse nested delimiters with regex{\.{3}}" + ).is_err() + ); + } } From d012d52b4da4286932a43587d8072d149dc9454a Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 19 Feb 2018 11:00:07 +0100 Subject: [PATCH 02/13] Parse template with state machine instead of regex This allows occurrences of `{` and `}` within `{}` placeholders in the template, and also for having literal `{` and `}` in the template by means of escaping (`\{`). Unbalanced, unescaped `}` at the toplevel is a syntax error which currently triggers a panic; I'll add proper error handling as I move the license template parsing code into the config parsing phase. --- src/lib.rs | 99 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 27 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 858a273a1e5e..6771a2ab7940 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,7 +43,7 @@ use syntax::ast; use syntax::codemap::{CodeMap, FilePathMapping}; pub use syntax::codemap::FileName; use syntax::parse::{self, ParseSess}; -use regex::{Regex, RegexBuilder}; +use regex::Regex; use checkstyle::{output_footer, output_header}; use comment::{CharClasses, FullCodeCharKind}; @@ -416,33 +416,77 @@ fn should_report_error( } fn check_license(text: &str, license_template: &str) -> Result { + // the template is parsed using a state machine + enum State { + Lit, + LitEsc, + // the u32 keeps track of brace nesting + Re(u32), + ReEsc(u32), + } + let mut template_re = String::from("^"); - // the template is parsed as a series of pairs of capture groups of (1) lazy whatever, which - // will be matched literally, followed by (2) a {}-delimited block, which will be matched as a - // regex - let template_parser = RegexBuilder::new(r"(.*?)\{(.*?)\}") - .dot_matches_new_line(true) - .build() - .unwrap(); - // keep track of the last matched offset and ultimately append the tail of the template (if any) - // after the last {} block - let mut last_matched_offset = 0; - for caps in template_parser.captures_iter(license_template) { - if let Some(mat) = caps.get(0) { - last_matched_offset = mat.end() - } - if let Some(mat) = caps.get(1) { - template_re.push_str(®ex::escape(mat.as_str())) - } - if let Some(mat) = caps.get(2) { - let mut re = mat.as_str(); - if re.is_empty() { - re = ".*?"; + let mut buffer = String::new(); + let mut state = State::Lit; + for chr in license_template.chars() { + state = match state { + State::Lit => match chr { + '{' => { + template_re.push_str(®ex::escape(&buffer)); + buffer.clear(); + State::Re(1) + } + '}' => panic!("license template syntax error"), + '\\' => State::LitEsc, + _ => { + buffer.push(chr); + State::Lit + } + }, + State::LitEsc => { + buffer.push(chr); + State::Lit + } + State::Re(brace_nesting) => { + match chr { + '{' => { + buffer.push(chr); + State::Re(brace_nesting + 1) + } + '}' => { + match brace_nesting { + 1 => { + // default regex for empty placeholder {} + if buffer.is_empty() { + buffer = ".*?".to_string(); + } + template_re.push_str(&buffer); + buffer.clear(); + State::Lit + } + _ => { + buffer.push(chr); + State::Re(brace_nesting - 1) + } + } + } + '\\' => { + buffer.push(chr); + State::ReEsc(brace_nesting) + } + _ => { + buffer.push(chr); + State::Re(brace_nesting) + } + } + } + State::ReEsc(brace_nesting) => { + buffer.push(chr); + State::Re(brace_nesting) } - template_re.push_str(re) } } - template_re.push_str(®ex::escape(&license_template[last_matched_offset..])); + template_re.push_str(®ex::escape(&buffer)); let template_re = Regex::new(&template_re)?; Ok(template_re.is_match(text)) } @@ -1035,9 +1079,10 @@ false, assert!(check_license("", "this is not a valid {[regex}").is_err()); assert!( check_license( - "can't parse nested delimiters with regex", - r"can't parse nested delimiters with regex{\.{3}}" - ).is_err() + "parse unbalanced nested delimiters{{{", + r"parse unbalanced nested delimiters{\{{3}}" + ).unwrap() ); + assert!(check_license("escaping }", r"escaping \}").unwrap()); } } From ad76741bca51d169d0e167223435bcc08fc2a3af Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 19 Feb 2018 17:26:29 +0100 Subject: [PATCH 03/13] Move license template parsing into config phase --- src/config/config_type.rs | 54 +++++++++++- src/config/mod.rs | 179 +++++++++++++++++++++++++++++++++++++- src/lib.rs | 151 +++----------------------------- 3 files changed, 238 insertions(+), 146 deletions(-) diff --git a/src/config/config_type.rs b/src/config/config_type.rs index 950225679a54..02e9b2d10783 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -78,6 +78,9 @@ macro_rules! create_config { #[derive(Clone)] pub struct Config { + // if a license_template_path has been specified, successfully read, parsed and compiled + // into a regex, it will be stored here + pub license_template: Option, // For each config item, we store a bool indicating whether it has // been accessed and the value, and a bool whether the option was // manually initialised, or taken from the default, @@ -118,8 +121,10 @@ macro_rules! create_config { $( pub fn $i(&mut self, value: $ty) { (self.0).$i.2 = value; - if stringify!($i) == "use_small_heuristics" { - self.0.set_heuristics(); + match stringify!($i) { + "use_small_heuristics" => self.0.set_heuristics(), + "license_template_path" => self.0.set_license_template(), + &_ => (), } } )+ @@ -189,6 +194,7 @@ macro_rules! create_config { } )+ self.set_heuristics(); + self.set_license_template(); self } @@ -276,8 +282,10 @@ macro_rules! create_config { _ => panic!("Unknown config key in override: {}", key) } - if key == "use_small_heuristics" { - self.set_heuristics(); + match key { + "use_small_heuristics" => self.set_heuristics(), + "license_template_path" => self.set_license_template(), + &_ => (), } } @@ -382,12 +390,50 @@ macro_rules! create_config { self.set().width_heuristics(WidthHeuristics::null()); } } + + fn set_license_template(&mut self) { + let license_template_path = self.license_template_path(); + let mut license_template_file = match File::open(&license_template_path) { + Ok(file) => file, + Err(e) => { + eprintln!("Warning: unable to open license template file {:?}: {}", + license_template_path, e); + return; + } + }; + let mut license_template_str = String::new(); + match license_template_file.read_to_string(&mut license_template_str) { + Ok(_) => (), + Err(e) => { + eprintln!("Warning: unable to read from license template file {:?}: {}", + license_template_path, e); + return; + } + } + let license_template_parsed = match parse_license_template(&license_template_str) { + Ok(string) => string, + Err(e) => { + eprintln!("Warning: unable to parse license template file {:?}: {}", + license_template_path, e); + return; + } + }; + self.license_template = match Regex::new(&license_template_parsed) { + Ok(re) => Some(re), + Err(e) => { + eprintln!("Warning: regex syntax error in placeholder, unable to compile \ + license template from file {:?}: {}", license_template_path, e); + return; + } + } + } } // Template for the default configuration impl Default for Config { fn default() -> Config { Config { + license_template: None, $( $i: (Cell::new(false), false, $def, $stb), )+ diff --git a/src/config/mod.rs b/src/config/mod.rs index c16d5bb67995..53078716414f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -15,6 +15,8 @@ use std::fs::File; use std::io::{Error, ErrorKind, Read}; use std::path::{Path, PathBuf}; +use regex::Regex; + #[macro_use] mod config_type; #[macro_use] @@ -50,7 +52,7 @@ create_config! { comment_width: usize, 80, false, "Maximum length of comments. No effect unless wrap_comments = true"; normalize_comments: bool, false, true, "Convert /* */ comments to // comments where possible"; - license_template: String, String::default(), false, "Check for license"; + license_template_path: String, String::default(), false, "Beginning of file must match license template"; // Single line expressions and items. empty_item_single_line: bool, true, false, @@ -172,9 +174,145 @@ pub fn get_toml_path(dir: &Path) -> Result, Error> { Ok(None) } +/// Convert the license template into a string which can be turned into a regex. +/// +/// The license template could use regex syntax directly, but that would require a lot of manual +/// escaping, which is inconvenient. It is therefore literal by default, with optional regex +/// subparts delimited by `{` and `}`. Additionally: +/// +/// - to insert literal `{`, `}` or `\`, escape it with `\` +/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}` +/// +/// This function parses this input format and builds a properly escaped *string* representation of +/// the equivalent regular expression. It **does not** however guarantee that the returned string is +/// a syntactically valid regular expression. +/// +/// # Examples +/// +/// ``` +/// assert_eq!( +/// rustfmt_config::parse_license_template( +/// r" +/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)} +/// // file at the top-level directory of this distribution and at +/// // {}. +/// // +/// // Licensed under the Apache License, Version 2.0 or the MIT license +/// // , at your +/// // option. This file may not be copied, modified, or distributed +/// // except according to those terms. +/// " +/// ).unwrap(), +/// r"^ +/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+) +/// // file at the top\-level directory of this distribution and at +/// // .*?\. +/// // +/// // Licensed under the Apache License, Version 2\.0 or the MIT license +/// // , at your +/// // option\. This file may not be copied, modified, or distributed +/// // except according to those terms\. +/// " +/// ); +/// ``` +pub fn parse_license_template(template: &str) -> Result { + // the template is parsed using a state machine + enum State { + Lit, + LitEsc, + // the u32 keeps track of brace nesting + Re(u32), + ReEsc(u32), + } + + let mut parsed = String::from("^"); + let mut buffer = String::new(); + let mut state = State::Lit; + let mut linum = 1; + // keeps track of last line on which a regex placeholder was started + let mut open_brace_line = 0; + for chr in template.chars() { + if chr == '\n' { + linum += 1; + } + state = match state { + State::Lit => match chr { + '{' => { + parsed.push_str(®ex::escape(&buffer)); + buffer.clear(); + open_brace_line = linum; + State::Re(1) + } + '}' => return Err(format!("escape or balance closing brace on l. {}", linum)), + '\\' => State::LitEsc, + _ => { + buffer.push(chr); + State::Lit + } + }, + State::LitEsc => { + buffer.push(chr); + State::Lit + } + State::Re(brace_nesting) => { + match chr { + '{' => { + buffer.push(chr); + State::Re(brace_nesting + 1) + } + '}' => { + match brace_nesting { + 1 => { + // default regex for empty placeholder {} + if buffer.is_empty() { + buffer = ".*?".to_string(); + } + parsed.push_str(&buffer); + buffer.clear(); + State::Lit + } + _ => { + buffer.push(chr); + State::Re(brace_nesting - 1) + } + } + } + '\\' => { + buffer.push(chr); + State::ReEsc(brace_nesting) + } + _ => { + buffer.push(chr); + State::Re(brace_nesting) + } + } + } + State::ReEsc(brace_nesting) => { + buffer.push(chr); + State::Re(brace_nesting) + } + } + } + match state { + State::Re(_) | State::ReEsc(_) => { + return Err(format!( + "escape or balance opening brace on l. {}", + open_brace_line + )); + } + State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)), + _ => (), + } + parsed.push_str(®ex::escape(&buffer)); + + Ok(parsed) +} + #[cfg(test)] mod test { - use super::Config; + use super::{parse_license_template, Config}; #[test] fn test_config_set() { @@ -211,6 +349,43 @@ mod test { assert_eq!(config.was_set().verbose(), false); } + #[test] + fn test_parse_license_template() { + assert_eq!( + parse_license_template("literal (.*)").unwrap(), + r"^literal \(\.\*\)" + ); + assert_eq!( + parse_license_template(r"escaping \}").unwrap(), + r"^escaping \}" + ); + assert!(parse_license_template("unbalanced } without escape").is_err()); + assert_eq!( + parse_license_template(r"{\d+} place{-?}holder{s?}").unwrap(), + r"^\d+ place-?holders?" + ); + assert_eq!( + parse_license_template("default {}").unwrap(), + "^default .*?" + ); + assert_eq!( + parse_license_template(r"unbalanced nested braces {\{{3}}").unwrap(), + r"^unbalanced nested braces \{{3}" + ); + assert_eq!( + parse_license_template("parsing error }").unwrap_err(), + "escape or balance closing brace on l. 1" + ); + assert_eq!( + parse_license_template("parsing error {\nsecond line").unwrap_err(), + "escape or balance opening brace on l. 1" + ); + assert_eq!( + parse_license_template(r"parsing error \").unwrap_err(), + "incomplete escape sequence on l. 1" + ); + } + // FIXME(#2183) these tests cannot be run in parallel because they use env vars // #[test] // fn test_as_not_nightly_channel() { diff --git a/src/lib.rs b/src/lib.rs index 6771a2ab7940..e53f8bfb9076 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,7 +43,6 @@ use syntax::ast; use syntax::codemap::{CodeMap, FilePathMapping}; pub use syntax::codemap::FileName; use syntax::parse::{self, ParseSess}; -use regex::Regex; use checkstyle::{output_footer, output_header}; use comment::{CharClasses, FullCodeCharKind}; @@ -102,8 +101,6 @@ pub enum ErrorKind { BadIssue(Issue), // License check has failed LicenseCheck, - // License template could not be parsed - ParsingLicense, } impl fmt::Display for ErrorKind { @@ -117,7 +114,6 @@ impl fmt::Display for ErrorKind { ErrorKind::TrailingWhitespace => write!(fmt, "left behind trailing whitespace"), ErrorKind::BadIssue(issue) => write!(fmt, "found {}", issue), ErrorKind::LicenseCheck => write!(fmt, "license check failed"), - ErrorKind::ParsingLicense => write!(fmt, "parsing regex in license template failed"), } } } @@ -136,8 +132,7 @@ impl FormattingError { match self.kind { ErrorKind::LineOverflow(..) | ErrorKind::TrailingWhitespace - | ErrorKind::LicenseCheck - | ErrorKind::ParsingLicense => "error:", + | ErrorKind::LicenseCheck => "error:", ErrorKind::BadIssue(_) => "WARNING:", } } @@ -415,82 +410,6 @@ fn should_report_error( } } -fn check_license(text: &str, license_template: &str) -> Result { - // the template is parsed using a state machine - enum State { - Lit, - LitEsc, - // the u32 keeps track of brace nesting - Re(u32), - ReEsc(u32), - } - - let mut template_re = String::from("^"); - let mut buffer = String::new(); - let mut state = State::Lit; - for chr in license_template.chars() { - state = match state { - State::Lit => match chr { - '{' => { - template_re.push_str(®ex::escape(&buffer)); - buffer.clear(); - State::Re(1) - } - '}' => panic!("license template syntax error"), - '\\' => State::LitEsc, - _ => { - buffer.push(chr); - State::Lit - } - }, - State::LitEsc => { - buffer.push(chr); - State::Lit - } - State::Re(brace_nesting) => { - match chr { - '{' => { - buffer.push(chr); - State::Re(brace_nesting + 1) - } - '}' => { - match brace_nesting { - 1 => { - // default regex for empty placeholder {} - if buffer.is_empty() { - buffer = ".*?".to_string(); - } - template_re.push_str(&buffer); - buffer.clear(); - State::Lit - } - _ => { - buffer.push(chr); - State::Re(brace_nesting - 1) - } - } - } - '\\' => { - buffer.push(chr); - State::ReEsc(brace_nesting) - } - _ => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - State::ReEsc(brace_nesting) => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - template_re.push_str(®ex::escape(&buffer)); - let template_re = Regex::new(&template_re)?; - Ok(template_re.is_match(text)) -} - // Formatting done on a char by char or line by line basis. // FIXME(#20) other stuff for parity with make tidy fn format_lines( @@ -513,28 +432,15 @@ fn format_lines( let allow_issue_seek = !issue_seeker.is_disabled(); // Check license. - if config.was_set().license_template() { - match check_license(text, &config.license_template()) { - Ok(check) => { - if !check { - errors.push(FormattingError { - line: cur_line, - kind: ErrorKind::LicenseCheck, - is_comment: false, - is_string: false, - line_buffer: String::new(), - }); - } - } - Err(_) => { - errors.push(FormattingError { - line: cur_line, - kind: ErrorKind::ParsingLicense, - is_comment: false, - is_string: false, - line_buffer: String::new(), - }); - } + if let Some(ref license_template) = config.license_template { + if !license_template.is_match(text) { + errors.push(FormattingError { + line: cur_line, + kind: ErrorKind::LicenseCheck, + is_comment: false, + is_string: false, + line_buffer: String::new(), + }); } } @@ -964,7 +870,7 @@ pub fn run(input: Input, config: &Config) -> Summary { #[cfg(test)] mod test { - use super::{check_license, format_code_block, format_snippet, Config}; + use super::{format_code_block, format_snippet, Config}; #[test] fn test_no_panic_on_format_snippet_and_format_code_block() { @@ -1050,39 +956,4 @@ false, };"; assert!(test_format_inner(format_code_block, code_block, expected)); } - - #[test] - fn test_check_license() { - assert!(check_license("literal matching", "literal matching").unwrap()); - assert!(!check_license("literal no match", "literal matching").unwrap()); - assert!( - check_license( - "Regex start and end: 2018", - r"{[Rr]egex} start {} end: {\d+}" - ).unwrap() - ); - assert!(!check_license( - "Regex start and end no match: 2018", - r"{[Rr]egex} start {} end: {\d+}" - ).unwrap()); - assert!( - check_license( - "Regex in the middle: 2018 (tm)", - r"Regex {} middle: {\d+} (tm)" - ).unwrap() - ); - assert!(!check_license( - "Regex in the middle no match: 2018 (tm)", - r"Regex {} middle: {\d+} (tm)" - ).unwrap()); - assert!(!check_license("default doesn't match\nacross lines", "default {} lines").unwrap()); - assert!(check_license("", "this is not a valid {[regex}").is_err()); - assert!( - check_license( - "parse unbalanced nested delimiters{{{", - r"parse unbalanced nested delimiters{\{{3}}" - ).unwrap() - ); - assert!(check_license("escaping }", r"escaping \}").unwrap()); - } } From ead81205cc1f3597a3f7ecdde993d81f0d859e03 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 26 Feb 2018 12:23:15 +0100 Subject: [PATCH 04/13] =?UTF-8?q?Simplify=20match=20=E2=86=92=20if=20let?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/config/config_type.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/config/config_type.rs b/src/config/config_type.rs index 02e9b2d10783..ad2ae6f74227 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -402,14 +402,11 @@ macro_rules! create_config { } }; let mut license_template_str = String::new(); - match license_template_file.read_to_string(&mut license_template_str) { - Ok(_) => (), - Err(e) => { - eprintln!("Warning: unable to read from license template file {:?}: {}", - license_template_path, e); - return; - } - } + if let Err(e) = license_template_file.read_to_string(&mut license_template_str) { + eprintln!("Warning: unable to read from license template file {:?}: {}", + license_template_path, e); + return; + }; let license_template_parsed = match parse_license_template(&license_template_str) { Ok(string) => string, Err(e) => { From e48d7f3ebb3ee16d24ec8351f16768ea8f6ed9b0 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 26 Feb 2018 12:31:09 +0100 Subject: [PATCH 05/13] Account for possibly empty license_template_path Don't attempt to load license_template if the path wasn't specified. --- src/config/config_type.rs | 58 ++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/src/config/config_type.rs b/src/config/config_type.rs index ad2ae6f74227..e6e5c5ef540d 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -392,35 +392,37 @@ macro_rules! create_config { } fn set_license_template(&mut self) { - let license_template_path = self.license_template_path(); - let mut license_template_file = match File::open(&license_template_path) { - Ok(file) => file, - Err(e) => { - eprintln!("Warning: unable to open license template file {:?}: {}", - license_template_path, e); - return; - } - }; - let mut license_template_str = String::new(); - if let Err(e) = license_template_file.read_to_string(&mut license_template_str) { - eprintln!("Warning: unable to read from license template file {:?}: {}", - license_template_path, e); - return; - }; - let license_template_parsed = match parse_license_template(&license_template_str) { - Ok(string) => string, - Err(e) => { - eprintln!("Warning: unable to parse license template file {:?}: {}", - license_template_path, e); - return; - } - }; - self.license_template = match Regex::new(&license_template_parsed) { - Ok(re) => Some(re), - Err(e) => { - eprintln!("Warning: regex syntax error in placeholder, unable to compile \ - license template from file {:?}: {}", license_template_path, e); + if self.was_set().license_template_path() { + let license_template_path = self.license_template_path(); + let mut license_template_file = match File::open(&license_template_path) { + Ok(file) => file, + Err(e) => { + eprintln!("Warning: unable to open license template file {:?}: {}", + license_template_path, e); + return; + } + }; + let mut license_template_str = String::new(); + if let Err(e) = license_template_file.read_to_string(&mut license_template_str) { + eprintln!("Warning: unable to read from license template file {:?}: {}", + license_template_path, e); return; + }; + let license_template_parsed = match parse_license_template(&license_template_str) { + Ok(string) => string, + Err(e) => { + eprintln!("Warning: unable to parse license template file {:?}: {}", + license_template_path, e); + return; + } + }; + self.license_template = match Regex::new(&license_template_parsed) { + Ok(re) => Some(re), + Err(e) => { + eprintln!("Warning: regex syntax error in placeholder, unable to compile \ + license template from file {:?}: {}", license_template_path, e); + return; + } } } } From 310c1146f28eab02595e3308cdb2c3d8dbbeb3a7 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 26 Feb 2018 12:49:12 +0100 Subject: [PATCH 06/13] Move license template parsing into submodule --- rustfmt-config/src/license.rs | 174 +++++++++++++++++++++++++++++++++ src/config/config_type.rs | 2 +- src/config/mod.rs | 176 +--------------------------------- 3 files changed, 177 insertions(+), 175 deletions(-) create mode 100644 rustfmt-config/src/license.rs diff --git a/rustfmt-config/src/license.rs b/rustfmt-config/src/license.rs new file mode 100644 index 000000000000..4563f8a7809f --- /dev/null +++ b/rustfmt-config/src/license.rs @@ -0,0 +1,174 @@ +use regex; + +/// Convert the license template into a string which can be turned into a regex. +/// +/// The license template could use regex syntax directly, but that would require a lot of manual +/// escaping, which is inconvenient. It is therefore literal by default, with optional regex +/// subparts delimited by `{` and `}`. Additionally: +/// +/// - to insert literal `{`, `}` or `\`, escape it with `\` +/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}` +/// +/// This function parses this input format and builds a properly escaped *string* representation of +/// the equivalent regular expression. It **does not** however guarantee that the returned string is +/// a syntactically valid regular expression. +/// +/// # Examples +/// +/// ``` +/// # use rustfmt_config::license; +/// assert_eq!( +/// license::parse_template( +/// r" +/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)} +/// // file at the top-level directory of this distribution and at +/// // {}. +/// // +/// // Licensed under the Apache License, Version 2.0 or the MIT license +/// // , at your +/// // option. This file may not be copied, modified, or distributed +/// // except according to those terms. +/// " +/// ).unwrap(), +/// r"^ +/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+) +/// // file at the top\-level directory of this distribution and at +/// // .*?\. +/// // +/// // Licensed under the Apache License, Version 2\.0 or the MIT license +/// // , at your +/// // option\. This file may not be copied, modified, or distributed +/// // except according to those terms\. +/// " +/// ); +/// ``` +pub fn parse_template(template: &str) -> Result { + // the template is parsed using a state machine + enum State { + Lit, + LitEsc, + // the u32 keeps track of brace nesting + Re(u32), + ReEsc(u32), + } + + let mut parsed = String::from("^"); + let mut buffer = String::new(); + let mut state = State::Lit; + let mut linum = 1; + // keeps track of last line on which a regex placeholder was started + let mut open_brace_line = 0; + for chr in template.chars() { + if chr == '\n' { + linum += 1; + } + state = match state { + State::Lit => match chr { + '{' => { + parsed.push_str(®ex::escape(&buffer)); + buffer.clear(); + open_brace_line = linum; + State::Re(1) + } + '}' => return Err(format!("escape or balance closing brace on l. {}", linum)), + '\\' => State::LitEsc, + _ => { + buffer.push(chr); + State::Lit + } + }, + State::LitEsc => { + buffer.push(chr); + State::Lit + } + State::Re(brace_nesting) => { + match chr { + '{' => { + buffer.push(chr); + State::Re(brace_nesting + 1) + } + '}' => { + match brace_nesting { + 1 => { + // default regex for empty placeholder {} + if buffer.is_empty() { + buffer = ".*?".to_string(); + } + parsed.push_str(&buffer); + buffer.clear(); + State::Lit + } + _ => { + buffer.push(chr); + State::Re(brace_nesting - 1) + } + } + } + '\\' => { + buffer.push(chr); + State::ReEsc(brace_nesting) + } + _ => { + buffer.push(chr); + State::Re(brace_nesting) + } + } + } + State::ReEsc(brace_nesting) => { + buffer.push(chr); + State::Re(brace_nesting) + } + } + } + match state { + State::Re(_) | State::ReEsc(_) => { + return Err(format!( + "escape or balance opening brace on l. {}", + open_brace_line + )); + } + State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)), + _ => (), + } + parsed.push_str(®ex::escape(&buffer)); + + Ok(parsed) +} + +#[cfg(test)] +mod test { + use super::parse_template; + + #[test] + fn test_parse_license_template() { + assert_eq!( + parse_template("literal (.*)").unwrap(), + r"^literal \(\.\*\)" + ); + assert_eq!(parse_template(r"escaping \}").unwrap(), r"^escaping \}"); + assert!(parse_template("unbalanced } without escape").is_err()); + assert_eq!( + parse_template(r"{\d+} place{-?}holder{s?}").unwrap(), + r"^\d+ place-?holders?" + ); + assert_eq!(parse_template("default {}").unwrap(), "^default .*?"); + assert_eq!( + parse_template(r"unbalanced nested braces {\{{3}}").unwrap(), + r"^unbalanced nested braces \{{3}" + ); + assert_eq!( + parse_template("parsing error }").unwrap_err(), + "escape or balance closing brace on l. 1" + ); + assert_eq!( + parse_template("parsing error {\nsecond line").unwrap_err(), + "escape or balance opening brace on l. 1" + ); + assert_eq!( + parse_template(r"parsing error \").unwrap_err(), + "incomplete escape sequence on l. 1" + ); + } +} diff --git a/src/config/config_type.rs b/src/config/config_type.rs index e6e5c5ef540d..fe0e4c309e25 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -408,7 +408,7 @@ macro_rules! create_config { license_template_path, e); return; }; - let license_template_parsed = match parse_license_template(&license_template_str) { + let license_template_parsed = match license::parse_template(&license_template_str) { Ok(string) => string, Err(e) => { eprintln!("Warning: unable to parse license template file {:?}: {}", diff --git a/src/config/mod.rs b/src/config/mod.rs index 53078716414f..0d4ec8557d38 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -25,6 +25,7 @@ mod options; pub mod file_lines; pub mod lists; pub mod summary; +pub mod license; use config::config_type::ConfigType; use config::file_lines::FileLines; @@ -174,145 +175,9 @@ pub fn get_toml_path(dir: &Path) -> Result, Error> { Ok(None) } -/// Convert the license template into a string which can be turned into a regex. -/// -/// The license template could use regex syntax directly, but that would require a lot of manual -/// escaping, which is inconvenient. It is therefore literal by default, with optional regex -/// subparts delimited by `{` and `}`. Additionally: -/// -/// - to insert literal `{`, `}` or `\`, escape it with `\` -/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}` -/// -/// This function parses this input format and builds a properly escaped *string* representation of -/// the equivalent regular expression. It **does not** however guarantee that the returned string is -/// a syntactically valid regular expression. -/// -/// # Examples -/// -/// ``` -/// assert_eq!( -/// rustfmt_config::parse_license_template( -/// r" -/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)} -/// // file at the top-level directory of this distribution and at -/// // {}. -/// // -/// // Licensed under the Apache License, Version 2.0 or the MIT license -/// // , at your -/// // option. This file may not be copied, modified, or distributed -/// // except according to those terms. -/// " -/// ).unwrap(), -/// r"^ -/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+) -/// // file at the top\-level directory of this distribution and at -/// // .*?\. -/// // -/// // Licensed under the Apache License, Version 2\.0 or the MIT license -/// // , at your -/// // option\. This file may not be copied, modified, or distributed -/// // except according to those terms\. -/// " -/// ); -/// ``` -pub fn parse_license_template(template: &str) -> Result { - // the template is parsed using a state machine - enum State { - Lit, - LitEsc, - // the u32 keeps track of brace nesting - Re(u32), - ReEsc(u32), - } - - let mut parsed = String::from("^"); - let mut buffer = String::new(); - let mut state = State::Lit; - let mut linum = 1; - // keeps track of last line on which a regex placeholder was started - let mut open_brace_line = 0; - for chr in template.chars() { - if chr == '\n' { - linum += 1; - } - state = match state { - State::Lit => match chr { - '{' => { - parsed.push_str(®ex::escape(&buffer)); - buffer.clear(); - open_brace_line = linum; - State::Re(1) - } - '}' => return Err(format!("escape or balance closing brace on l. {}", linum)), - '\\' => State::LitEsc, - _ => { - buffer.push(chr); - State::Lit - } - }, - State::LitEsc => { - buffer.push(chr); - State::Lit - } - State::Re(brace_nesting) => { - match chr { - '{' => { - buffer.push(chr); - State::Re(brace_nesting + 1) - } - '}' => { - match brace_nesting { - 1 => { - // default regex for empty placeholder {} - if buffer.is_empty() { - buffer = ".*?".to_string(); - } - parsed.push_str(&buffer); - buffer.clear(); - State::Lit - } - _ => { - buffer.push(chr); - State::Re(brace_nesting - 1) - } - } - } - '\\' => { - buffer.push(chr); - State::ReEsc(brace_nesting) - } - _ => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - State::ReEsc(brace_nesting) => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - match state { - State::Re(_) | State::ReEsc(_) => { - return Err(format!( - "escape or balance opening brace on l. {}", - open_brace_line - )); - } - State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)), - _ => (), - } - parsed.push_str(®ex::escape(&buffer)); - - Ok(parsed) -} - #[cfg(test)] mod test { - use super::{parse_license_template, Config}; + use super::Config; #[test] fn test_config_set() { @@ -349,43 +214,6 @@ mod test { assert_eq!(config.was_set().verbose(), false); } - #[test] - fn test_parse_license_template() { - assert_eq!( - parse_license_template("literal (.*)").unwrap(), - r"^literal \(\.\*\)" - ); - assert_eq!( - parse_license_template(r"escaping \}").unwrap(), - r"^escaping \}" - ); - assert!(parse_license_template("unbalanced } without escape").is_err()); - assert_eq!( - parse_license_template(r"{\d+} place{-?}holder{s?}").unwrap(), - r"^\d+ place-?holders?" - ); - assert_eq!( - parse_license_template("default {}").unwrap(), - "^default .*?" - ); - assert_eq!( - parse_license_template(r"unbalanced nested braces {\{{3}}").unwrap(), - r"^unbalanced nested braces \{{3}" - ); - assert_eq!( - parse_license_template("parsing error }").unwrap_err(), - "escape or balance closing brace on l. 1" - ); - assert_eq!( - parse_license_template("parsing error {\nsecond line").unwrap_err(), - "escape or balance opening brace on l. 1" - ); - assert_eq!( - parse_license_template(r"parsing error \").unwrap_err(), - "incomplete escape sequence on l. 1" - ); - } - // FIXME(#2183) these tests cannot be run in parallel because they use env vars // #[test] // fn test_as_not_nightly_channel() { From bbd6d9cd555d9b35677086bc93e66212ea173cc5 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 26 Feb 2018 15:41:38 +0100 Subject: [PATCH 07/13] Refactor parsing code into struct This also splits the giant state machine match expression into separate methods. --- rustfmt-config/src/license.rs | 174 --------------------------- src/config/config_type.rs | 2 +- src/config/license.rs | 213 ++++++++++++++++++++++++++++++++++ src/config/mod.rs | 1 + 4 files changed, 215 insertions(+), 175 deletions(-) delete mode 100644 rustfmt-config/src/license.rs create mode 100644 src/config/license.rs diff --git a/rustfmt-config/src/license.rs b/rustfmt-config/src/license.rs deleted file mode 100644 index 4563f8a7809f..000000000000 --- a/rustfmt-config/src/license.rs +++ /dev/null @@ -1,174 +0,0 @@ -use regex; - -/// Convert the license template into a string which can be turned into a regex. -/// -/// The license template could use regex syntax directly, but that would require a lot of manual -/// escaping, which is inconvenient. It is therefore literal by default, with optional regex -/// subparts delimited by `{` and `}`. Additionally: -/// -/// - to insert literal `{`, `}` or `\`, escape it with `\` -/// - an empty regex placeholder (`{}`) is shorthand for `{.*?}` -/// -/// This function parses this input format and builds a properly escaped *string* representation of -/// the equivalent regular expression. It **does not** however guarantee that the returned string is -/// a syntactically valid regular expression. -/// -/// # Examples -/// -/// ``` -/// # use rustfmt_config::license; -/// assert_eq!( -/// license::parse_template( -/// r" -/// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)} -/// // file at the top-level directory of this distribution and at -/// // {}. -/// // -/// // Licensed under the Apache License, Version 2.0 or the MIT license -/// // , at your -/// // option. This file may not be copied, modified, or distributed -/// // except according to those terms. -/// " -/// ).unwrap(), -/// r"^ -/// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+) -/// // file at the top\-level directory of this distribution and at -/// // .*?\. -/// // -/// // Licensed under the Apache License, Version 2\.0 or the MIT license -/// // , at your -/// // option\. This file may not be copied, modified, or distributed -/// // except according to those terms\. -/// " -/// ); -/// ``` -pub fn parse_template(template: &str) -> Result { - // the template is parsed using a state machine - enum State { - Lit, - LitEsc, - // the u32 keeps track of brace nesting - Re(u32), - ReEsc(u32), - } - - let mut parsed = String::from("^"); - let mut buffer = String::new(); - let mut state = State::Lit; - let mut linum = 1; - // keeps track of last line on which a regex placeholder was started - let mut open_brace_line = 0; - for chr in template.chars() { - if chr == '\n' { - linum += 1; - } - state = match state { - State::Lit => match chr { - '{' => { - parsed.push_str(®ex::escape(&buffer)); - buffer.clear(); - open_brace_line = linum; - State::Re(1) - } - '}' => return Err(format!("escape or balance closing brace on l. {}", linum)), - '\\' => State::LitEsc, - _ => { - buffer.push(chr); - State::Lit - } - }, - State::LitEsc => { - buffer.push(chr); - State::Lit - } - State::Re(brace_nesting) => { - match chr { - '{' => { - buffer.push(chr); - State::Re(brace_nesting + 1) - } - '}' => { - match brace_nesting { - 1 => { - // default regex for empty placeholder {} - if buffer.is_empty() { - buffer = ".*?".to_string(); - } - parsed.push_str(&buffer); - buffer.clear(); - State::Lit - } - _ => { - buffer.push(chr); - State::Re(brace_nesting - 1) - } - } - } - '\\' => { - buffer.push(chr); - State::ReEsc(brace_nesting) - } - _ => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - State::ReEsc(brace_nesting) => { - buffer.push(chr); - State::Re(brace_nesting) - } - } - } - match state { - State::Re(_) | State::ReEsc(_) => { - return Err(format!( - "escape or balance opening brace on l. {}", - open_brace_line - )); - } - State::LitEsc => return Err(format!("incomplete escape sequence on l. {}", linum)), - _ => (), - } - parsed.push_str(®ex::escape(&buffer)); - - Ok(parsed) -} - -#[cfg(test)] -mod test { - use super::parse_template; - - #[test] - fn test_parse_license_template() { - assert_eq!( - parse_template("literal (.*)").unwrap(), - r"^literal \(\.\*\)" - ); - assert_eq!(parse_template(r"escaping \}").unwrap(), r"^escaping \}"); - assert!(parse_template("unbalanced } without escape").is_err()); - assert_eq!( - parse_template(r"{\d+} place{-?}holder{s?}").unwrap(), - r"^\d+ place-?holders?" - ); - assert_eq!(parse_template("default {}").unwrap(), "^default .*?"); - assert_eq!( - parse_template(r"unbalanced nested braces {\{{3}}").unwrap(), - r"^unbalanced nested braces \{{3}" - ); - assert_eq!( - parse_template("parsing error }").unwrap_err(), - "escape or balance closing brace on l. 1" - ); - assert_eq!( - parse_template("parsing error {\nsecond line").unwrap_err(), - "escape or balance opening brace on l. 1" - ); - assert_eq!( - parse_template(r"parsing error \").unwrap_err(), - "incomplete escape sequence on l. 1" - ); - } -} diff --git a/src/config/config_type.rs b/src/config/config_type.rs index fe0e4c309e25..8b9a6b2d84dc 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -408,7 +408,7 @@ macro_rules! create_config { license_template_path, e); return; }; - let license_template_parsed = match license::parse_template(&license_template_str) { + let license_template_parsed = match TemplateParser::parse(&license_template_str) { Ok(string) => string, Err(e) => { eprintln!("Warning: unable to parse license template file {:?}: {}", diff --git a/src/config/license.rs b/src/config/license.rs new file mode 100644 index 000000000000..3de045991292 --- /dev/null +++ b/src/config/license.rs @@ -0,0 +1,213 @@ +use regex; + +// the template is parsed using a state machine +enum ParsingState { + Lit, + LitEsc, + // the u32 keeps track of brace nesting + Re(u32), + ReEsc(u32), + Abort(String), +} + +use self::ParsingState::*; + +pub struct TemplateParser { + parsed: String, + buffer: String, + state: ParsingState, + linum: u32, + open_brace_line: u32, +} + +impl TemplateParser { + fn new() -> Self { + Self { + parsed: "^".to_owned(), + buffer: String::new(), + state: Lit, + linum: 1, + // keeps track of last line on which a regex placeholder was started + open_brace_line: 0, + } + } + + /// Convert a license template into a string which can be turned into a regex. + /// + /// The license template could use regex syntax directly, but that would require a lot of manual + /// escaping, which is inconvenient. It is therefore literal by default, with optional regex + /// subparts delimited by `{` and `}`. Additionally: + /// + /// - to insert literal `{`, `}` or `\`, escape it with `\` + /// - an empty regex placeholder (`{}`) is shorthand for `{.*?}` + /// + /// This function parses this input format and builds a properly escaped *string* representation + /// of the equivalent regular expression. It **does not** however guarantee that the returned + /// string is a syntactically valid regular expression. + /// + /// # Examples + /// + /// ``` + /// # use rustfmt_config::license::TemplateParser; + /// assert_eq!( + /// TemplateParser::parse( + /// r" + /// // Copyright {\d+} The \} Rust \\ Project \{ Developers. See the {([A-Z]+)} + /// // file at the top-level directory of this distribution and at + /// // {}. + /// // + /// // Licensed under the Apache License, Version 2.0 or the MIT license + /// // , at your + /// // option. This file may not be copied, modified, or distributed + /// // except according to those terms. + /// " + /// ).unwrap(), + /// r"^ + /// // Copyright \d+ The \} Rust \\ Project \{ Developers\. See the ([A-Z]+) + /// // file at the top\-level directory of this distribution and at + /// // .*?\. + /// // + /// // Licensed under the Apache License, Version 2\.0 or the MIT license + /// // , at your + /// // option\. This file may not be copied, modified, or distributed + /// // except according to those terms\. + /// " + /// ); + /// ``` + pub fn parse(template: &str) -> Result { + let mut parser = Self::new(); + for chr in template.chars() { + if chr == '\n' { + parser.linum += 1; + } + parser.state = match parser.state { + Lit => parser.trans_from_lit(chr), + LitEsc => parser.trans_from_litesc(chr), + Re(brace_nesting) => parser.trans_from_re(chr, brace_nesting), + ReEsc(brace_nesting) => parser.trans_from_reesc(chr, brace_nesting), + Abort(msg) => return Err(msg), + }; + } + // check if we've ended parsing in a valid state + match parser.state { + Abort(msg) => return Err(msg), + Re(_) | ReEsc(_) => { + return Err(format!( + "escape or balance opening brace on l. {}", + parser.open_brace_line + )); + } + LitEsc => return Err(format!("incomplete escape sequence on l. {}", parser.linum)), + _ => (), + } + parser.parsed.push_str(®ex::escape(&parser.buffer)); + + Ok(parser.parsed) + } + + fn trans_from_lit(&mut self, chr: char) -> ParsingState { + match chr { + '{' => { + self.parsed.push_str(®ex::escape(&self.buffer)); + self.buffer.clear(); + self.open_brace_line = self.linum; + Re(1) + } + '}' => Abort(format!( + "escape or balance closing brace on l. {}", + self.linum + )), + '\\' => LitEsc, + _ => { + self.buffer.push(chr); + Lit + } + } + } + + fn trans_from_litesc(&mut self, chr: char) -> ParsingState { + self.buffer.push(chr); + Lit + } + + fn trans_from_re(&mut self, chr: char, brace_nesting: u32) -> ParsingState { + match chr { + '{' => { + self.buffer.push(chr); + Re(brace_nesting + 1) + } + '}' => { + match brace_nesting { + 1 => { + // default regex for empty placeholder {} + if self.buffer.is_empty() { + self.parsed.push_str(".*?"); + } else { + self.parsed.push_str(&self.buffer); + } + self.buffer.clear(); + Lit + } + _ => { + self.buffer.push(chr); + Re(brace_nesting - 1) + } + } + } + '\\' => { + self.buffer.push(chr); + ReEsc(brace_nesting) + } + _ => { + self.buffer.push(chr); + Re(brace_nesting) + } + } + } + + fn trans_from_reesc(&mut self, chr: char, brace_nesting: u32) -> ParsingState { + self.buffer.push(chr); + Re(brace_nesting) + } +} + +#[cfg(test)] +mod test { + use super::TemplateParser; + + #[test] + fn test_parse_license_template() { + assert_eq!( + TemplateParser::parse("literal (.*)").unwrap(), + r"^literal \(\.\*\)" + ); + assert_eq!( + TemplateParser::parse(r"escaping \}").unwrap(), + r"^escaping \}" + ); + assert!(TemplateParser::parse("unbalanced } without escape").is_err()); + assert_eq!( + TemplateParser::parse(r"{\d+} place{-?}holder{s?}").unwrap(), + r"^\d+ place-?holders?" + ); + assert_eq!(TemplateParser::parse("default {}").unwrap(), "^default .*?"); + assert_eq!( + TemplateParser::parse(r"unbalanced nested braces {\{{3}}").unwrap(), + r"^unbalanced nested braces \{{3}" + ); + assert_eq!( + TemplateParser::parse("parsing error }").unwrap_err(), + "escape or balance closing brace on l. 1" + ); + assert_eq!( + TemplateParser::parse("parsing error {\nsecond line").unwrap_err(), + "escape or balance opening brace on l. 1" + ); + assert_eq!( + TemplateParser::parse(r"parsing error \").unwrap_err(), + "incomplete escape sequence on l. 1" + ); + } +} diff --git a/src/config/mod.rs b/src/config/mod.rs index 0d4ec8557d38..8b93743ec4d8 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -29,6 +29,7 @@ pub mod license; use config::config_type::ConfigType; use config::file_lines::FileLines; +use config::license::TemplateParser; pub use config::lists::*; pub use config::options::*; use config::summary::Summary; From b33451b4ede7f1114641928a06bb908551613637 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 26 Feb 2018 15:50:33 +0100 Subject: [PATCH 08/13] Fix indentation in create_config macro definition --- src/config/config_type.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/config/config_type.rs b/src/config/config_type.rs index 8b9a6b2d84dc..4c2e4c3d5bb6 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -398,21 +398,21 @@ macro_rules! create_config { Ok(file) => file, Err(e) => { eprintln!("Warning: unable to open license template file {:?}: {}", - license_template_path, e); + license_template_path, e); return; } }; let mut license_template_str = String::new(); if let Err(e) = license_template_file.read_to_string(&mut license_template_str) { eprintln!("Warning: unable to read from license template file {:?}: {}", - license_template_path, e); + license_template_path, e); return; }; let license_template_parsed = match TemplateParser::parse(&license_template_str) { Ok(string) => string, Err(e) => { eprintln!("Warning: unable to parse license template file {:?}: {}", - license_template_path, e); + license_template_path, e); return; } }; @@ -420,7 +420,7 @@ macro_rules! create_config { Ok(re) => Some(re), Err(e) => { eprintln!("Warning: regex syntax error in placeholder, unable to compile \ - license template from file {:?}: {}", license_template_path, e); + license template from file {:?}: {}", license_template_path, e); return; } } From 533d185f49af8434e44fd0f24bf4f1f1be8fb8d9 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 26 Feb 2018 15:57:31 +0100 Subject: [PATCH 09/13] Shorten var names to comply with line len reqs --- src/config/config_type.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/config/config_type.rs b/src/config/config_type.rs index 4c2e4c3d5bb6..0dcd377bffd3 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -393,34 +393,34 @@ macro_rules! create_config { fn set_license_template(&mut self) { if self.was_set().license_template_path() { - let license_template_path = self.license_template_path(); - let mut license_template_file = match File::open(&license_template_path) { + let lt_path = self.license_template_path(); + let mut lt_file = match File::open(<_path) { Ok(file) => file, Err(e) => { eprintln!("Warning: unable to open license template file {:?}: {}", - license_template_path, e); + lt_path, e); return; } }; - let mut license_template_str = String::new(); - if let Err(e) = license_template_file.read_to_string(&mut license_template_str) { + let mut lt_str = String::new(); + if let Err(e) = lt_file.read_to_string(&mut lt_str) { eprintln!("Warning: unable to read from license template file {:?}: {}", - license_template_path, e); + lt_path, e); return; }; - let license_template_parsed = match TemplateParser::parse(&license_template_str) { + let lt_parsed = match TemplateParser::parse(<_str) { Ok(string) => string, Err(e) => { eprintln!("Warning: unable to parse license template file {:?}: {}", - license_template_path, e); + lt_path, e); return; } }; - self.license_template = match Regex::new(&license_template_parsed) { + self.license_template = match Regex::new(<_parsed) { Ok(re) => Some(re), Err(e) => { eprintln!("Warning: regex syntax error in placeholder, unable to compile \ - license template from file {:?}: {}", license_template_path, e); + license template from file {:?}: {}", lt_path, e); return; } } From 53347bc22620c226ec5d761376e8c7056c093acf Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 26 Feb 2018 17:01:40 +0100 Subject: [PATCH 10/13] Add license_template_path configuration snippet --- Configurations.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Configurations.md b/Configurations.md index 324546f5a0d5..8540bc36383f 100644 --- a/Configurations.md +++ b/Configurations.md @@ -2115,3 +2115,23 @@ Enable unstable featuers on stable channel. - **Default value**: `false` - **Possible values**: `true`, `false` - **Stable**: Yes + +## `license_template_path` + +Check whether beginnings of files match a license template. + +- **Default value**: `""`` +- **Possible values**: path to a license template file +- **Stable**: No + +A license template is a plain text file which is matched literally against the +beginning of each source file, except for `{}`-delimited blocks, which are +matched as regular expressions. The following license template therefore +matches strings like `// Copyright 2017 The Rust Project Developers.`, `// +Copyright 2018 The Rust Project Developers.`, etc.: + +``` +// Copyright {\d+} The Rust Project Developers. +``` + +`\{`, `\}` and `\\` match literal braces / backslashes. From 1db84a3ec5ba40792f549bd815559f7d0c1ba234 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 26 Feb 2018 18:53:46 +0100 Subject: [PATCH 11/13] Wrap license-related errors in enum --- src/config/config_type.rs | 47 +++++++++------------------ src/config/license.rs | 67 ++++++++++++++++++++++++++++++++------- src/config/mod.rs | 2 +- 3 files changed, 71 insertions(+), 45 deletions(-) diff --git a/src/config/config_type.rs b/src/config/config_type.rs index 0dcd377bffd3..314a1a26b4e6 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -392,39 +392,22 @@ macro_rules! create_config { } fn set_license_template(&mut self) { - if self.was_set().license_template_path() { - let lt_path = self.license_template_path(); - let mut lt_file = match File::open(<_path) { - Ok(file) => file, - Err(e) => { - eprintln!("Warning: unable to open license template file {:?}: {}", - lt_path, e); - return; - } - }; - let mut lt_str = String::new(); - if let Err(e) = lt_file.read_to_string(&mut lt_str) { - eprintln!("Warning: unable to read from license template file {:?}: {}", - lt_path, e); - return; - }; - let lt_parsed = match TemplateParser::parse(<_str) { - Ok(string) => string, - Err(e) => { - eprintln!("Warning: unable to parse license template file {:?}: {}", - lt_path, e); - return; - } - }; - self.license_template = match Regex::new(<_parsed) { - Ok(re) => Some(re), - Err(e) => { - eprintln!("Warning: regex syntax error in placeholder, unable to compile \ - license template from file {:?}: {}", lt_path, e); - return; - } - } + if !self.was_set().license_template_path() { + return; } + let lt_path = self.license_template_path(); + let try = || -> Result { + let mut lt_file = File::open(<_path)?; + let mut lt_str = String::new(); + lt_file.read_to_string(&mut lt_str)?; + let lt_parsed = TemplateParser::parse(<_str)?; + Ok(Regex::new(<_parsed)?) + }; + match try() { + Ok(re) => self.license_template = Some(re), + Err(msg) => eprintln!("Warning for license template file {:?}: {}", + lt_path, msg), + }; } } diff --git a/src/config/license.rs b/src/config/license.rs index 3de045991292..ce05634b876d 100644 --- a/src/config/license.rs +++ b/src/config/license.rs @@ -1,5 +1,37 @@ +use std::io; +use std::fmt; + use regex; +#[derive(Debug)] +pub enum LicenseError { + IO(io::Error), + Regex(regex::Error), + Parse(String), +} + +impl fmt::Display for LicenseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + LicenseError::IO(ref err) => err.fmt(f), + LicenseError::Regex(ref err) => err.fmt(f), + LicenseError::Parse(ref err) => write!(f, "parsing failed, {}", err), + } + } +} + +impl From for LicenseError { + fn from(err: io::Error) -> LicenseError { + LicenseError::IO(err) + } +} + +impl From for LicenseError { + fn from(err: regex::Error) -> LicenseError { + LicenseError::Regex(err) + } +} + // the template is parsed using a state machine enum ParsingState { Lit, @@ -76,7 +108,7 @@ impl TemplateParser { /// " /// ); /// ``` - pub fn parse(template: &str) -> Result { + pub fn parse(template: &str) -> Result { let mut parser = Self::new(); for chr in template.chars() { if chr == '\n' { @@ -87,19 +119,24 @@ impl TemplateParser { LitEsc => parser.trans_from_litesc(chr), Re(brace_nesting) => parser.trans_from_re(chr, brace_nesting), ReEsc(brace_nesting) => parser.trans_from_reesc(chr, brace_nesting), - Abort(msg) => return Err(msg), + Abort(msg) => return Err(LicenseError::Parse(msg)), }; } // check if we've ended parsing in a valid state match parser.state { - Abort(msg) => return Err(msg), + Abort(msg) => return Err(LicenseError::Parse(msg)), Re(_) | ReEsc(_) => { - return Err(format!( + return Err(LicenseError::Parse(format!( "escape or balance opening brace on l. {}", parser.open_brace_line - )); + ))); + } + LitEsc => { + return Err(LicenseError::Parse(format!( + "incomplete escape sequence on l. {}", + parser.linum + ))) } - LitEsc => return Err(format!("incomplete escape sequence on l. {}", parser.linum)), _ => (), } parser.parsed.push_str(®ex::escape(&parser.buffer)); @@ -198,16 +235,22 @@ mod test { r"^unbalanced nested braces \{{3}" ); assert_eq!( - TemplateParser::parse("parsing error }").unwrap_err(), - "escape or balance closing brace on l. 1" + &TemplateParser::parse("parsing error }") + .unwrap_err() + .to_string(), + "parsing failed, escape or balance closing brace on l. 1" ); assert_eq!( - TemplateParser::parse("parsing error {\nsecond line").unwrap_err(), - "escape or balance opening brace on l. 1" + &TemplateParser::parse("parsing error {\nsecond line") + .unwrap_err() + .to_string(), + "parsing failed, escape or balance opening brace on l. 1" ); assert_eq!( - TemplateParser::parse(r"parsing error \").unwrap_err(), - "incomplete escape sequence on l. 1" + &TemplateParser::parse(r"parsing error \") + .unwrap_err() + .to_string(), + "parsing failed, incomplete escape sequence on l. 1" ); } } diff --git a/src/config/mod.rs b/src/config/mod.rs index 8b93743ec4d8..8142b5034fb0 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -29,7 +29,7 @@ pub mod license; use config::config_type::ConfigType; use config::file_lines::FileLines; -use config::license::TemplateParser; +use config::license::{LicenseError, TemplateParser}; pub use config::lists::*; pub use config::options::*; use config::summary::Summary; From 085cc90599bea1c31f565f1495c4ce79d177ce49 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Tue, 27 Feb 2018 15:00:29 +0100 Subject: [PATCH 12/13] Load and compile template in proper function Get rid of the unncessary closure. --- src/config/config_type.rs | 22 +++++++--------------- src/config/license.rs | 11 +++++++++++ src/config/mod.rs | 1 - 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/config/config_type.rs b/src/config/config_type.rs index 314a1a26b4e6..dc768490fbba 100644 --- a/src/config/config_type.rs +++ b/src/config/config_type.rs @@ -392,22 +392,14 @@ macro_rules! create_config { } fn set_license_template(&mut self) { - if !self.was_set().license_template_path() { - return; + if self.was_set().license_template_path() { + let lt_path = self.license_template_path(); + match license::load_and_compile_template(<_path) { + Ok(re) => self.license_template = Some(re), + Err(msg) => eprintln!("Warning for license template file {:?}: {}", + lt_path, msg), + } } - let lt_path = self.license_template_path(); - let try = || -> Result { - let mut lt_file = File::open(<_path)?; - let mut lt_str = String::new(); - lt_file.read_to_string(&mut lt_str)?; - let lt_parsed = TemplateParser::parse(<_str)?; - Ok(Regex::new(<_parsed)?) - }; - match try() { - Ok(re) => self.license_template = Some(re), - Err(msg) => eprintln!("Warning for license template file {:?}: {}", - lt_path, msg), - }; } } diff --git a/src/config/license.rs b/src/config/license.rs index ce05634b876d..1830fcb3cf26 100644 --- a/src/config/license.rs +++ b/src/config/license.rs @@ -1,7 +1,10 @@ use std::io; use std::fmt; +use std::fs::File; +use std::io::Read; use regex; +use regex::Regex; #[derive(Debug)] pub enum LicenseError { @@ -210,6 +213,14 @@ impl TemplateParser { } } +pub fn load_and_compile_template(path: &str) -> Result { + let mut lt_file = File::open(&path)?; + let mut lt_str = String::new(); + lt_file.read_to_string(&mut lt_str)?; + let lt_parsed = TemplateParser::parse(<_str)?; + Ok(Regex::new(<_parsed)?) +} + #[cfg(test)] mod test { use super::TemplateParser; diff --git a/src/config/mod.rs b/src/config/mod.rs index 8142b5034fb0..0d4ec8557d38 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -29,7 +29,6 @@ pub mod license; use config::config_type::ConfigType; use config::file_lines::FileLines; -use config::license::{LicenseError, TemplateParser}; pub use config::lists::*; pub use config::options::*; use config::summary::Summary; From 01f652799d94140493e97a7af9ad3b696fdba5e1 Mon Sep 17 00:00:00 2001 From: David Lukes Date: Mon, 5 Mar 2018 13:39:30 +0100 Subject: [PATCH 13/13] Make license doctest pass again --- src/config/license.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config/license.rs b/src/config/license.rs index 1830fcb3cf26..b2babd5ac191 100644 --- a/src/config/license.rs +++ b/src/config/license.rs @@ -83,7 +83,7 @@ impl TemplateParser { /// # Examples /// /// ``` - /// # use rustfmt_config::license::TemplateParser; + /// # use rustfmt_nightly::config::license::TemplateParser; /// assert_eq!( /// TemplateParser::parse( /// r"