From 9f98f725cb9ff3a31c330a4117ea1962d91d18f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Cassiers?= <gaetan.cassiers@gmail.com>
Date: Sun, 10 Jan 2016 15:12:15 +0100
Subject: [PATCH 1/2] Detect when comments disappear

When the reformatted code doesn't contain the same quantity of comments
as the original code, use the original code instead of the reformatted
code.
This is done for all expressions and `let` statements.

This should be used at the finest grained level possible, to avoid that
a small disappearing comment prevents a big chunk of code to be
reformatted.

Kind of fixes (avoid disappearing comments, but prevents a good
formatting is such case) #285 #225 #563 #743
---
 src/comment.rs                        | 262 ++++++++++++++++++++++----
 src/expr.rs                           |  12 +-
 tests/source/paths.rs                 |   3 +-
 tests/target/comment-not-disappear.rs |  42 +++++
 tests/target/paths.rs                 |   1 +
 5 files changed, 277 insertions(+), 43 deletions(-)
 create mode 100644 tests/target/comment-not-disappear.rs
diff --git a/src/comment.rs b/src/comment.rs
index c111259979cb..3f716d9039ca 100644
--- a/src/comment.rs
+++ b/src/comment.rs
@@ -8,13 +8,17 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-// Format comments.
+// Formatting and tools for comments.
 
-use std::iter;
+use std::{self, iter};
+
+use syntax::codemap::Span;
 
 use Indent;
 use config::Config;
+use rewrite::RewriteContext;
 use string::{StringFormat, rewrite_string};
+use utils::wrap_str;
 
 pub fn rewrite_comment(orig: &str,
                        block_style: bool,
@@ -150,7 +154,7 @@ impl FindUncommented for str {
                 }
                 Some(c) => {
                     match kind {
-                        CodeCharKind::Normal if b == c => {}
+                        FullCodeCharKind::Normal if b == c => {}
                         _ => {
                             needle_iter = pat.chars();
                         }
@@ -174,7 +178,7 @@ impl FindUncommented for str {
 pub fn find_comment_end(s: &str) -> Option<usize> {
     let mut iter = CharClasses::new(s.char_indices());
     for (kind, (i, _c)) in &mut iter {
-        if kind == CodeCharKind::Normal {
+        if kind == FullCodeCharKind::Normal {
             return Some(i);
         }
     }
@@ -189,7 +193,7 @@ pub fn find_comment_end(s: &str) -> Option<usize> {
 
 /// Returns true if text contains any comment.
 pub fn contains_comment(text: &str) -> bool {
-    CharClasses::new(text.chars()).any(|(kind, _)| kind == CodeCharKind::Comment)
+    CharClasses::new(text.chars()).any(|(kind, _)| kind.is_comment())
 }
 
 struct CharClasses<T>
@@ -240,6 +244,33 @@ pub enum CodeCharKind {
     Comment,
 }
 
+#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+enum FullCodeCharKind {
+    Normal,
+    StartComment,
+    InComment,
+    EndComment,
+}
+
+impl FullCodeCharKind {
+    fn is_comment(&self) -> bool {
+        match *self {
+            FullCodeCharKind::Normal => false,
+            FullCodeCharKind::StartComment |
+            FullCodeCharKind::InComment |
+            FullCodeCharKind::EndComment => true,
+        }
+    }
+
+    fn to_codecharkind(&self) -> CodeCharKind {
+        if self.is_comment() {
+            CodeCharKind::Comment
+        } else {
+            CodeCharKind::Normal
+        }
+    }
+}
+
 impl<T> CharClasses<T>
     where T: Iterator,
           T::Item: RichChar
@@ -256,9 +287,9 @@ impl<T> Iterator for CharClasses<T>
     where T: Iterator,
           T::Item: RichChar
 {
-    type Item = (CodeCharKind, T::Item);
+    type Item = (FullCodeCharKind, T::Item);
 
-    fn next(&mut self) -> Option<(CodeCharKind, T::Item)> {
+    fn next(&mut self) -> Option<(FullCodeCharKind, T::Item)> {
         let item = try_opt!(self.base.next());
         let chr = item.get_char();
         self.status = match self.status {
@@ -286,11 +317,11 @@ impl<T> Iterator for CharClasses<T>
                         match self.base.peek() {
                             Some(next) if next.get_char() == '*' => {
                                 self.status = CharClassesStatus::BlockCommentOpening(1);
-                                return Some((CodeCharKind::Comment, item));
+                                return Some((FullCodeCharKind::StartComment, item));
                             }
                             Some(next) if next.get_char() == '/' => {
                                 self.status = CharClassesStatus::LineComment;
-                                return Some((CodeCharKind::Comment, item));
+                                return Some((FullCodeCharKind::StartComment, item));
                             }
                             _ => CharClassesStatus::Normal,
                         }
@@ -299,12 +330,7 @@ impl<T> Iterator for CharClasses<T>
                 }
             }
             CharClassesStatus::BlockComment(deepness) => {
-                if deepness == 0 {
-                    // This is the closing '/'
-                    assert_eq!(chr, '/');
-                    self.status = CharClassesStatus::Normal;
-                    return Some((CodeCharKind::Comment, item));
-                }
+                assert!(deepness != 0);
                 self.status = match self.base.peek() {
                     Some(next) if next.get_char() == '/' && chr == '*' => {
                         CharClassesStatus::BlockCommentClosing(deepness - 1)
@@ -314,34 +340,92 @@ impl<T> Iterator for CharClasses<T>
                     }
                     _ => CharClassesStatus::BlockComment(deepness),
                 };
-                return Some((CodeCharKind::Comment, item));
+                return Some((FullCodeCharKind::InComment, item));
             }
             CharClassesStatus::BlockCommentOpening(deepness) => {
                 assert_eq!(chr, '*');
                 self.status = CharClassesStatus::BlockComment(deepness);
-                return Some((CodeCharKind::Comment, item));
+                return Some((FullCodeCharKind::InComment, item));
             }
             CharClassesStatus::BlockCommentClosing(deepness) => {
                 assert_eq!(chr, '/');
-                self.status = if deepness == 0 {
-                    CharClassesStatus::Normal
+                if deepness == 0 {
+                    self.status = CharClassesStatus::Normal;
+                    return Some((FullCodeCharKind::EndComment, item));
                 } else {
-                    CharClassesStatus::BlockComment(deepness)
-                };
-                return Some((CodeCharKind::Comment, item));
+                    self.status = CharClassesStatus::BlockComment(deepness);
+                    return Some((FullCodeCharKind::InComment, item));
+                }
             }
             CharClassesStatus::LineComment => {
-                self.status = match chr {
-                    '\n' => CharClassesStatus::Normal,
-                    _ => CharClassesStatus::LineComment,
-                };
-                return Some((CodeCharKind::Comment, item));
+                match chr {
+                    '\n' => {
+                        self.status = CharClassesStatus::Normal;
+                        return Some((FullCodeCharKind::EndComment, item));
+                    }
+                    _ => {
+                        self.status = CharClassesStatus::LineComment;
+                        return Some((FullCodeCharKind::InComment, item));
+                    }
+                }
             }
         };
-        Some((CodeCharKind::Normal, item))
+        Some((FullCodeCharKind::Normal, item))
     }
 }
 
+/// Iterator over functional and commented parts of a string. Any part of a string is either
+/// functional code, either *one* block comment, either *one* line comment. Whitespace between
+/// comments is functional code. Line comments contain their ending newlines.
+struct UngroupedCommentCodeSlices<'a> {
+    slice: &'a str,
+    iter: iter::Peekable<CharClasses<std::str::CharIndices<'a>>>,
+}
+
+impl<'a> UngroupedCommentCodeSlices<'a> {
+    fn new(code: &'a str) -> UngroupedCommentCodeSlices<'a> {
+        UngroupedCommentCodeSlices {
+            slice: code,
+            iter: CharClasses::new(code.char_indices()).peekable(),
+        }
+    }
+}
+
+impl<'a> Iterator for UngroupedCommentCodeSlices<'a> {
+    type Item = (CodeCharKind, usize, &'a str);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let (kind, (start_idx, _)) = try_opt!(self.iter.next());
+        match kind {
+            FullCodeCharKind::Normal => {
+                // Consume all the Normal code
+                while let Some(&(FullCodeCharKind::Normal, (_, _))) = self.iter.peek() {
+                    let _ = self.iter.next();
+                }
+            }
+            FullCodeCharKind::StartComment => {
+                // Consume the whole comment
+                while let Some((FullCodeCharKind::InComment, (_, _))) = self.iter.next() {}
+            }
+            _ => panic!(),
+        }
+        let slice = match self.iter.peek() {
+            Some(&(_, (end_idx, _))) => &self.slice[start_idx..end_idx],
+            None => &self.slice[start_idx..],
+        };
+        Some((if kind.is_comment() {
+            CodeCharKind::Comment
+        } else {
+            CodeCharKind::Normal
+        },
+              start_idx,
+              slice))
+    }
+}
+
+
+
+
 /// Iterator over an alternating sequence of functional and commented parts of
 /// a string. The first item is always a, possibly zero length, subslice of
 /// functional text. Line style comments contain their ending newlines.
@@ -383,7 +467,7 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
                 first_whitespace = Some(i);
             }
 
-            if kind == self.last_slice_kind && !is_comment_connector {
+            if kind.to_codecharkind() == self.last_slice_kind && !is_comment_connector {
                 let last_index = match first_whitespace {
                     Some(j) => j,
                     None => i,
@@ -419,20 +503,124 @@ impl<'a> Iterator for CommentCodeSlices<'a> {
     }
 }
 
+/// Checks is `new` didn't miss any comment from `span`, if it removed any, return previous text
+/// (if it fits in the width/offset, else return None), else return `new`
+pub fn recover_comment_removed(new: String,
+                               span: Span,
+                               context: &RewriteContext,
+                               width: usize,
+                               offset: Indent)
+                               -> Option<String> {
+    let snippet = context.snippet(span);
+    if changed_comment_content(&snippet, &new) {
+        // We missed some comments
+        // Keep previous formatting if it satisfies the constrains
+        return wrap_str(snippet, context.config.max_width, width, offset);
+    } else {
+        Some(new)
+    }
+}
+
+/// Return true if the two strings of code have the same payload of comments.
+/// The payload of comments is everything in the string except:
+/// 	- actual code (not comments)
+/// 	- comment start/end marks
+///		- whitespace
+///		- '*' at the beginning of lines in block comments
+fn changed_comment_content(orig: &str, new: &str) -> bool {
+    // Cannot write this as a fn since we cannot return types containing closures
+    let code_comment_content = |code| {
+        let slices = UngroupedCommentCodeSlices::new(code);
+        slices.filter(|&(ref kind, _, _)| *kind == CodeCharKind::Comment)
+              .flat_map(|(_, _, s)| CommentReducer::new(s))
+    };
+    let res = code_comment_content(orig).ne(code_comment_content(new));
+    debug!("comment::changed_comment_content: {}\norig: '{}'\nnew: '{}'\nraw_old: {}\nraw_new: {}",
+           res,
+           orig,
+           new,
+           code_comment_content(orig).collect::<String>(),
+           code_comment_content(new).collect::<String>());
+    res
+}
+
+
+/// Iterator over the 'payload' characters of a comment.
+/// It skips whitespace, comment start/end marks, and '*' at the beginning of lines.
+/// The comment must be one comment, ie not more than one start mark (no multiple line comments,
+/// for example).
+struct CommentReducer<'a> {
+    is_block: bool,
+    at_start_line: bool,
+    iter: std::str::Chars<'a>,
+}
+
+impl<'a> CommentReducer<'a> {
+    fn new(comment: &'a str) -> CommentReducer<'a> {
+        let is_block = comment.starts_with("/*");
+        let comment = remove_comment_header(comment);
+        CommentReducer {
+            is_block: is_block,
+            at_start_line: false, // There are no supplementary '*' on the first line
+            iter: comment.chars(),
+        }
+    }
+}
+
+impl<'a> Iterator for CommentReducer<'a> {
+    type Item = char;
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            let mut c = try_opt!(self.iter.next());
+            if self.is_block && self.at_start_line {
+                while c.is_whitespace() {
+                    c = try_opt!(self.iter.next());
+                }
+                // Ignore leading '*'
+                if c == '*' {
+                    c = try_opt!(self.iter.next());
+                }
+            } else {
+                if c == '\n' {
+                    self.at_start_line = true;
+                }
+            }
+            if !c.is_whitespace() {
+                return Some(c);
+            }
+        }
+    }
+}
+
+
+fn remove_comment_header(comment: &str) -> &str {
+    if comment.starts_with("///") || comment.starts_with("//!") {
+        &comment[3..]
+    } else if comment.starts_with("//") {
+        &comment[2..]
+    } else if comment.starts_with("/**") || comment.starts_with("/*!") {
+        &comment[3..comment.len() - 2]
+    } else {
+        assert!(comment.starts_with("/*"),
+                format!("string '{}' is not a comment", comment));
+        &comment[2..comment.len() - 2]
+    }
+}
+
 #[cfg(test)]
 mod test {
-    use super::{CharClasses, CodeCharKind, contains_comment, rewrite_comment, FindUncommented,
-                CommentCodeSlices};
+    use super::{CharClasses, CodeCharKind, FullCodeCharKind, contains_comment, rewrite_comment,
+                FindUncommented, CommentCodeSlices};
     use Indent;
 
     #[test]
     fn char_classes() {
         let mut iter = CharClasses::new("//\n\n".chars());
 
-        assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Comment, '/'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Comment, '\n'), iter.next().unwrap());
-        assert_eq!((CodeCharKind::Normal, '\n'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::StartComment, '/'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::InComment, '/'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::EndComment, '\n'), iter.next().unwrap());
+        assert_eq!((FullCodeCharKind::Normal, '\n'), iter.next().unwrap());
         assert_eq!(None, iter.next());
     }
 
@@ -507,8 +695,8 @@ mod test {
         CharClasses::new(text.chars())
             .filter_map(|(s, c)| {
                 match s {
-                    CodeCharKind::Normal => Some(c),
-                    CodeCharKind::Comment => None,
+                    FullCodeCharKind::Normal => Some(c),
+                    _ => None,
                 }
             })
             .collect()
diff --git a/src/expr.rs b/src/expr.rs
index c9f684c6c101..269e45f9e9fb 100644
--- a/src/expr.rs
+++ b/src/expr.rs
@@ -23,7 +23,7 @@ use utils::{span_after, extra_offset, last_line_width, wrap_str, binary_search,
             semicolon_for_stmt};
 use visitor::FmtVisitor;
 use config::{Config, StructLitStyle, MultilineStyle};
-use comment::{FindUncommented, rewrite_comment, contains_comment};
+use comment::{FindUncommented, rewrite_comment, contains_comment, recover_comment_removed};
 use types::rewrite_path;
 use items::{span_lo_for_arg, span_hi_for_arg};
 use chains::rewrite_chain;
@@ -35,7 +35,7 @@ use syntax::visit::Visitor;
 
 impl Rewrite for ast::Expr {
     fn rewrite(&self, context: &RewriteContext, width: usize, offset: Indent) -> Option<String> {
-        match self.node {
+        let result = match self.node {
             ast::Expr_::ExprVec(ref expr_vec) => {
                 rewrite_array(expr_vec.iter().map(|e| &**e),
                               mk_sp(span_after(self.span, "[", context.codemap), self.span.hi),
@@ -207,7 +207,8 @@ impl Rewrite for ast::Expr {
                          width,
                          offset)
             }
-        }
+        };
+        result.and_then(|res| recover_comment_removed(res, self.span, context, width, offset))
     }
 }
 
@@ -478,7 +479,7 @@ impl Rewrite for ast::Block {
 
 impl Rewrite for ast::Stmt {
     fn rewrite(&self, context: &RewriteContext, _width: usize, offset: Indent) -> Option<String> {
-        match self.node {
+        let result = match self.node {
             ast::Stmt_::StmtDecl(ref decl, _) => {
                 if let ast::Decl_::DeclLocal(ref local) = decl.node {
                     local.rewrite(context, context.config.max_width, offset)
@@ -499,7 +500,8 @@ impl Rewrite for ast::Stmt {
                   .map(|s| s + suffix)
             }
             ast::Stmt_::StmtMac(..) => None,
-        }
+        };
+        result.and_then(|res| recover_comment_removed(res, self.span, context, _width, offset))
     }
 }
 
diff --git a/tests/source/paths.rs b/tests/source/paths.rs
index 8a4ab769ba3a..af61ba89b1cb 100644
--- a/tests/source/paths.rs
+++ b/tests/source/paths.rs
@@ -17,7 +17,8 @@ fn main() {
 
     < *mut JSObject >:: relocate(entry);
 
-    let x: Foo/*::*/<A   >;
+    let x: Foo<A   >;
+    let x: Foo/*::*/<A>;
 }
 
 fn op(foo: Bar, key : &[u8], upd : Fn(Option<&memcache::Item> , Baz  ) -> Result) -> MapResult {}
diff --git a/tests/target/comment-not-disappear.rs b/tests/target/comment-not-disappear.rs
new file mode 100644
index 000000000000..23c00c418be4
--- /dev/null
+++ b/tests/target/comment-not-disappear.rs
@@ -0,0 +1,42 @@
+// All the comments here should not disappear.
+
+fn a() {
+    match x {
+        X |
+        // A comment
+        Y => {}
+    };
+}
+
+fn b() {
+    match x {
+        X =>
+            // A comment
+            y
+    }
+}
+
+fn c() {
+    a() /* ... */;
+}
+
+fn foo() -> Vec<i32> {
+    (0..11)
+        .map(|x|
+        // This comment disappears.
+        if x % 2 == 0 { x } else { x * 2 })
+        .collect()
+}
+
+fn d() {
+    if true /* and ... */ {
+        a();
+    }
+}
+
+fn calc_page_len(prefix_len: usize, sofar: usize) -> usize {
+    2 // page type and flags
+    + 1 // stored depth
+    + 2 // stored count
+    + prefix_len + sofar // sum of size of all the actual items
+}
diff --git a/tests/target/paths.rs b/tests/target/paths.rs
index f1b142b3a5c4..adae879e647c 100644
--- a/tests/target/paths.rs
+++ b/tests/target/paths.rs
@@ -17,6 +17,7 @@ fn main() {
     <*mut JSObject>::relocate(entry);
 
     let x: Foo<A>;
+    let x: Foo/*::*/<A>;
 }
 
 fn op(foo: Bar, key: &[u8], upd: Fn(Option<&memcache::Item>, Baz) -> Result) -> MapResult {}

From b117d7b2b845fad5d5bb30a620a13b4810e7ffe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABtan=20Cassiers?= <gaetan.cassiers@gmail.com>
Date: Sun, 10 Jan 2016 22:04:30 +0100
Subject: [PATCH 2/2] Document comment::*CodeCharKind

---
 src/comment.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/comment.rs b/src/comment.rs
index 3f716d9039ca..3382ab53276c 100644
--- a/src/comment.rs
+++ b/src/comment.rs
@@ -238,17 +238,25 @@ enum CharClassesStatus {
     LineComment,
 }
 
+/// Distinguish between functionnal part of code and comments
 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 pub enum CodeCharKind {
     Normal,
     Comment,
 }
 
+/// Distinguish between functionnal part of code and comments,
+/// describing opening and closing of comments for ease when chunking
+/// code from tagged characters
 #[derive(PartialEq, Eq, Debug, Clone, Copy)]
 enum FullCodeCharKind {
     Normal,
+    /// The first character of a comment, there is only one for a comment (always '/')
     StartComment,
+    /// Any character inside a comment including the second character of comment
+    /// marks ("//", "/*")
     InComment,
+    /// Last character of a comment, '\n' for a line comment, '/' for a block comment.
     EndComment,
 }