diff --git a/src/string.rs b/src/string.rs index 39eaf57d72e9..387bee3cec90 100644 --- a/src/string.rs +++ b/src/string.rs @@ -41,21 +41,43 @@ impl<'a> StringFormat<'a> { config, } } + + /// Returns the maximum number of graphemes that is possible on a line while taking the + /// indentation into account. + /// + /// If we cannot put at least a single character per line, the rewrite won't succeed. + fn max_chars_with_indent(&self) -> Option { + Some( + self.shape + .width + .checked_sub(self.opener.len() + self.line_end.len() + 1)? + + 1; + ) + } + + /// Like max_chars_with_indent but the indentation is not substracted. + /// This allows to fit more graphemes from the string on a line when + /// SnippetState::Overflow. + fn max_chars_without_indent(&self) -> Option { + Some(self.config.max_width().checked_sub(self.line_end.len())?) + } } -// FIXME: simplify this! pub fn rewrite_string<'a>( orig: &str, fmt: &StringFormat<'a>, max_width: Option, ) -> Option { + let max_chars_with_indent = fmt.max_chars_with_indent()?; + let max_chars_without_indent = fmt.max_chars_without_indent()?; + let indent = fmt.shape.indent.to_string_with_newline(fmt.config); + // Strip line breaks. - let re = Regex::new(r"([^\\](\\\\)*)\\[\n\r][[:space:]]*").unwrap(); - let stripped_str = re.replace_all(orig, "$1"); + // With this regex applied, all remaining whitespaces are significant + let strip_line_breaks_re = Regex::new(r"([^\\](\\\\)*)\\[\n\r][[:space:]]*").unwrap(); + let stripped_str = strip_line_breaks_re.replace_all(orig, "$1"); let graphemes = UnicodeSegmentation::graphemes(&*stripped_str, false).collect::>(); - let shape = fmt.shape; - let indent = shape.indent.to_string_with_newline(fmt.config); // `cur_start` is the position in `orig` of the start of the current line. let mut cur_start = 0; @@ -67,87 +89,131 @@ pub fn rewrite_string<'a>( ); result.push_str(fmt.opener); - let ender_length = fmt.line_end.len(); - // If we cannot put at least a single character per line, the rewrite won't - // succeed. - let mut max_chars = shape - .width - .checked_sub(fmt.opener.len() + ender_length + 1)? - + 1; - - // Snip a line at a time from `orig` until it is used up. Push the snippet + // Snip a line at a time from `stripped_str` until it is used up. Push the snippet // onto result. - 'outer: loop { - // `cur_end` will be where we break the line, as an offset into `orig`. - // Initialised to the maximum it could be (which may be beyond `orig`). - let mut cur_end = cur_start + max_chars; - - // We can fit the rest of the string on this line, so we're done. - if cur_end >= graphemes.len() { - let line = &graphemes[cur_start..].join(""); - result.push_str(line); - break 'outer; + let mut cur_max_chars = max_chars_with_indent; + loop { + // All the input starting at cur_start fits on the current line + if graphemes.len() - cur_start <= cur_max_chars { + result.push_str(&graphemes[cur_start..].join("")); + break; } - // Push cur_end left until we reach whitespace (or the line is too small). - while !is_whitespace(graphemes[cur_end - 1]) { - cur_end -= 1; - if cur_end < cur_start + MIN_STRING { - // We couldn't find whitespace before the string got too small. - // So start again at the max length and look for punctuation. - cur_end = cur_start + max_chars; - while !is_punctuation(graphemes[cur_end - 1]) { - cur_end -= 1; - - // If we can't break at whitespace or punctuation, grow the string instead. - if cur_end < cur_start + MIN_STRING { - cur_end = cur_start + max_chars; - while !(is_punctuation(graphemes[cur_end - 1]) - || is_whitespace(graphemes[cur_end - 1])) - { - cur_end += 1; - if cur_end == graphemes.len() { - let line = &graphemes[cur_start..].join(""); - result.push_str(line); - break 'outer; - } - } - break; - } - } + // The input starting at cur_start needs to be broken + match break_string(cur_max_chars, fmt.trim_end, &graphemes[cur_start..]) { + SnippetState::LineEnd(line, len) => { + result.push_str(&line); + result.push_str(fmt.line_end); + result.push_str(&indent); + result.push_str(fmt.line_start); + cur_max_chars = max_chars_with_indent; + cur_start += len; + } + SnippetState::Overflow(line, len) => { + result.push_str(&line); + cur_max_chars = max_chars_without_indent; + cur_start += len; + } + SnippetState::EndOfInput(line) => { + result.push_str(&line); break; } } - // Make sure there is no whitespace to the right of the break. - while cur_end < stripped_str.len() && is_whitespace(graphemes[cur_end]) { - cur_end += 1; - } - - // Make the current line and add it on to result. - let raw_line = graphemes[cur_start..cur_end].join(""); - let line = if fmt.trim_end { - raw_line.trim() - } else { - raw_line.as_str() - }; - - result.push_str(line); - result.push_str(fmt.line_end); - result.push_str(&indent); - result.push_str(fmt.line_start); - - // The next line starts where the current line ends. - cur_start = cur_end; - - if let Some(new_max_chars) = max_width { - max_chars = new_max_chars.checked_sub(fmt.opener.len() + ender_length + 1)? + 1; - } } result.push_str(fmt.closer); wrap_str(result, fmt.config.max_width(), fmt.shape) } +/// Result of breaking a string so it fits in a line and the state it ended in. +/// The state informs about what to do with the snippet and how to continue the breaking process. +#[derive(Debug, PartialEq)] +enum SnippetState { + /// The input could not be broken and so rewriting the string is finished. + EndOfInput(String), + /// The input could be broken and the returned snippet should be ended with a + /// `[StringFormat::line_end]`. The next snippet needs to be indented. + LineEnd(String, usize), + /// The input could be broken but the returned snippet should not be ended with a + /// `[StringFormat::line_end]` because the whitespace is significant. Therefore, the next + /// snippet should not be indented. + Overflow(String, usize), +} + +/// Break the input string at a boundary character around the offset `max_chars`. A boundary +/// character is either a punctuation or a whitespace. +fn break_string(max_chars: usize, trim_end: bool, input: &[&str]) -> SnippetState { + let break_at = |index /* grapheme at index is included */| { + // Take in any whitespaces to the left/right of `input[index]` and + // check if there is a line feed, in which case whitespaces needs to be kept. + let mut index_minus_ws = index; + for (i, grapheme) in input[0..=index].iter().enumerate().rev() { + if !trim_end && is_line_feed(grapheme) { + return SnippetState::Overflow(input[0..=i].join("").to_string(), i + 1); + } else if !is_whitespace(grapheme) { + index_minus_ws = i; + break; + } + } + let mut index_plus_ws = index; + for (i, grapheme) in input[index + 1..].iter().enumerate() { + if !trim_end && is_line_feed(grapheme) { + return SnippetState::Overflow( + input[0..=index + 1 + i].join("").to_string(), + index + 2 + i, + ); + } else if !is_whitespace(grapheme) { + index_plus_ws = index + i; + break; + } + } + + if trim_end { + SnippetState::LineEnd( + input[0..=index_minus_ws].join("").to_string(), + index_plus_ws + 1, + ) + } else { + SnippetState::LineEnd( + input[0..=index_plus_ws].join("").to_string(), + index_plus_ws + 1, + ) + } + }; + + // Find the position in input for breaking the string + match input[0..max_chars] + .iter() + .rposition(|grapheme| is_whitespace(grapheme)) + { + // Found a whitespace and what is on its left side is big enough. + Some(index) if index >= MIN_STRING => break_at(index), + // No whitespace found, try looking for a punctuation instead + _ => match input[0..max_chars] + .iter() + .rposition(|grapheme| is_punctuation(grapheme)) + { + // Found a punctuation and what is on its left side is big enough. + Some(index) if index >= MIN_STRING => break_at(index), + // Either no boundary character was found to the left of `input[max_chars]`, or the line + // got too small. We try searching for a boundary character to the right. + _ => match input[max_chars..] + .iter() + .position(|grapheme| is_whitespace(grapheme) || is_punctuation(grapheme)) + { + // A boundary was found after the line limit + Some(index) => break_at(max_chars + index), + // No boundary to the right, the input cannot be broken + None => SnippetState::EndOfInput(input.join("").to_string()), + }, + }, + } +} + +fn is_line_feed(grapheme: &str) -> bool { + grapheme.as_bytes()[0] == b'\n' +} + fn is_whitespace(grapheme: &str) -> bool { grapheme.chars().all(|c| c.is_whitespace()) } @@ -161,8 +227,9 @@ fn is_punctuation(grapheme: &str) -> bool { #[cfg(test)] mod test { - use super::{rewrite_string, StringFormat}; + use super::{break_string, rewrite_string, SnippetState, StringFormat}; use shape::{Indent, Shape}; + use unicode_segmentation::UnicodeSegmentation; #[test] fn issue343() { @@ -170,4 +237,89 @@ mod test { let fmt = StringFormat::new(Shape::legacy(2, Indent::empty()), &config); rewrite_string("eq_", &fmt, None); } + + #[test] + fn should_break_on_whitespace() { + let string = "Placerat felis. Mauris porta ante sagittis purus."; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!( + break_string(20, false, &graphemes[..]), + SnippetState::LineEnd("Placerat felis. ".to_string(), 16) + ); + assert_eq!( + break_string(20, true, &graphemes[..]), + SnippetState::LineEnd("Placerat felis.".to_string(), 16) + ); + } + + #[test] + fn should_break_on_punctuation() { + let string = "Placerat_felis._Mauris_porta_ante_sagittis_purus."; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!( + break_string(20, false, &graphemes[..]), + SnippetState::LineEnd("Placerat_felis.".to_string(), 15) + ); + } + + #[test] + fn should_break_forward() { + let string = "Venenatis_tellus_vel_tellus. Aliquam aliquam dolor at justo."; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!( + break_string(20, false, &graphemes[..]), + SnippetState::LineEnd("Venenatis_tellus_vel_tellus. ".to_string(), 29) + ); + assert_eq!( + break_string(20, true, &graphemes[..]), + SnippetState::LineEnd("Venenatis_tellus_vel_tellus.".to_string(), 29) + ); + } + + #[test] + fn nothing_to_break() { + let string = "Venenatis_tellus_vel_tellus"; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!( + break_string(20, false, &graphemes[..]), + SnippetState::EndOfInput("Venenatis_tellus_vel_tellus".to_string()) + ); + } + + #[test] + fn significant_whitespaces() { + let string = "Neque in sem. \n Pellentesque tellus augue."; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!( + break_string(15, false, &graphemes[..]), + SnippetState::Overflow("Neque in sem. \n".to_string(), 20) + ); + assert_eq!( + break_string(25, false, &graphemes[..]), + SnippetState::Overflow("Neque in sem. \n".to_string(), 20) + ); + // if `StringFormat::line_end` is true, then the line feed does not matter anymore + assert_eq!( + break_string(15, true, &graphemes[..]), + SnippetState::LineEnd("Neque in sem.".to_string(), 26) + ); + assert_eq!( + break_string(25, true, &graphemes[..]), + SnippetState::LineEnd("Neque in sem.".to_string(), 26) + ); + } + + #[test] + fn big_whitespace() { + let string = "Neque in sem. Pellentesque tellus augue."; + let graphemes = UnicodeSegmentation::graphemes(&*string, false).collect::>(); + assert_eq!( + break_string(20, false, &graphemes[..]), + SnippetState::LineEnd("Neque in sem. ".to_string(), 25) + ); + assert_eq!( + break_string(20, true, &graphemes[..]), + SnippetState::LineEnd("Neque in sem.".to_string(), 25) + ); + } } diff --git a/src/utils.rs b/src/utils.rs index 48b71d9a84fa..99275b52dc12 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -320,7 +320,7 @@ macro_rules! skip_out_of_file_lines_range_visitor { } // Wraps String in an Option. Returns Some when the string adheres to the -// Rewrite constraints defined for the Rewrite trait and else otherwise. +// Rewrite constraints defined for the Rewrite trait and None otherwise. pub fn wrap_str(s: String, max_width: usize, shape: Shape) -> Option { if is_valid_str(&s, max_width, shape) { Some(s) diff --git a/tests/source/issue-1210/a.rs b/tests/source/issue-1210/a.rs new file mode 100644 index 000000000000..6bb9964b4e35 --- /dev/null +++ b/tests/source/issue-1210/a.rs @@ -0,0 +1,12 @@ +// rustfmt-format_strings: true +// rustfmt-max_width: 50 + +impl Foo { + fn cxx(&self, target: &str) -> &Path { + match self.cxx.get(target) { + Some(p) => p.path(), + None => panic!("\n\ntarget `{}` is not configured as a host, + only as a target\n\n", target), + } + } +} diff --git a/tests/source/issue-1210/b.rs b/tests/source/issue-1210/b.rs new file mode 100644 index 000000000000..8c71ef98b7fc --- /dev/null +++ b/tests/source/issue-1210/b.rs @@ -0,0 +1,12 @@ +// rustfmt-format_strings: true +// rustfmt-max_width: 50 + +impl Foo { + fn cxx(&self, target: &str) -> &Path { + match self.cxx.get(target) { + Some(p) => p.path(), + None => panic!("\ntarget `{}`: is not, configured as a host, + only as a target\n\n", target), + } + } +} diff --git a/tests/source/issue-1210/c.rs b/tests/source/issue-1210/c.rs new file mode 100644 index 000000000000..c080cef950b3 --- /dev/null +++ b/tests/source/issue-1210/c.rs @@ -0,0 +1,5 @@ +// rustfmt-format_strings: true +// rustfmt-max_width: 50 + +const foo: String = "trailing_spaces!! + keep them! Amet neque. Praesent rhoncus eros non velit."; diff --git a/tests/source/issue-1210/d.rs b/tests/source/issue-1210/d.rs new file mode 100644 index 000000000000..783736bc3b2c --- /dev/null +++ b/tests/source/issue-1210/d.rs @@ -0,0 +1,4 @@ +// rustfmt-wrap_comments: true + +// trailing_spaces_in_comment!! +// remove those from above diff --git a/tests/source/issue-1210/e.rs b/tests/source/issue-1210/e.rs new file mode 100644 index 000000000000..9abada1d6d86 --- /dev/null +++ b/tests/source/issue-1210/e.rs @@ -0,0 +1,8 @@ +// rustfmt-format_strings: true +// rustfmt-max_width: 50 + +// explicit line breaks should be kept in order to preserve the layout + +const foo: String = "Suspendisse vel augue at felis tincidunt sollicitudin. Fusce arcu. + Duis et odio et leo + sollicitudin consequat. Aliquam lobortis. Phasellus condimentum."; diff --git a/tests/target/issue-1210/a.rs b/tests/target/issue-1210/a.rs new file mode 100644 index 000000000000..94c1b44e5e56 --- /dev/null +++ b/tests/target/issue-1210/a.rs @@ -0,0 +1,16 @@ +// rustfmt-format_strings: true +// rustfmt-max_width: 50 + +impl Foo { + fn cxx(&self, target: &str) -> &Path { + match self.cxx.get(target) { + Some(p) => p.path(), + None => panic!( + "\n\ntarget `{}` is not \ + configured as a host, + only as a target\n\n", + target + ), + } + } +} diff --git a/tests/target/issue-1210/b.rs b/tests/target/issue-1210/b.rs new file mode 100644 index 000000000000..a7b1e3bcdc89 --- /dev/null +++ b/tests/target/issue-1210/b.rs @@ -0,0 +1,16 @@ +// rustfmt-format_strings: true +// rustfmt-max_width: 50 + +impl Foo { + fn cxx(&self, target: &str) -> &Path { + match self.cxx.get(target) { + Some(p) => p.path(), + None => panic!( + "\ntarget `{}`: is not, \ + configured as a host, + only as a target\n\n", + target + ), + } + } +} diff --git a/tests/target/issue-1210/c.rs b/tests/target/issue-1210/c.rs new file mode 100644 index 000000000000..183d79f92586 --- /dev/null +++ b/tests/target/issue-1210/c.rs @@ -0,0 +1,7 @@ +// rustfmt-format_strings: true +// rustfmt-max_width: 50 + +const foo: String = + "trailing_spaces!! + keep them! Amet neque. Praesent \ + rhoncus eros non velit."; diff --git a/tests/target/issue-1210/d.rs b/tests/target/issue-1210/d.rs new file mode 100644 index 000000000000..9279e6fc9990 --- /dev/null +++ b/tests/target/issue-1210/d.rs @@ -0,0 +1,4 @@ +// rustfmt-wrap_comments: true + +// trailing_spaces_in_comment!! +// remove those from above diff --git a/tests/target/issue-1210/e.rs b/tests/target/issue-1210/e.rs new file mode 100644 index 000000000000..55f80c6c3cc2 --- /dev/null +++ b/tests/target/issue-1210/e.rs @@ -0,0 +1,11 @@ +// rustfmt-format_strings: true +// rustfmt-max_width: 50 + +// explicit line breaks should be kept in order to preserve the layout + +const foo: String = + "Suspendisse vel augue at felis tincidunt \ + sollicitudin. Fusce arcu. + Duis et odio et leo + sollicitudin consequat. Aliquam \ + lobortis. Phasellus condimentum.";