syntax: methodify the lexer
This commit is contained in:
parent
5343eb7e0c
commit
46d1af28b5
8 changed files with 1195 additions and 1187 deletions
424
src/libsyntax/parse/lexer/comments.rs
Normal file
424
src/libsyntax/parse/lexer/comments.rs
Normal file
|
|
@ -0,0 +1,424 @@
|
|||
// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
|
||||
// file at the top-level directory of this distribution and at
|
||||
// http://rust-lang.org/COPYRIGHT.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
use ast;
|
||||
use codemap::{BytePos, CharPos, CodeMap, Pos};
|
||||
use diagnostic;
|
||||
use parse::lexer::{is_whitespace, Reader};
|
||||
use parse::lexer::{StringReader, TokenAndSpan};
|
||||
use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
|
||||
use parse::lexer;
|
||||
use parse::token;
|
||||
|
||||
use std::io;
|
||||
use std::str;
|
||||
use std::string::String;
|
||||
use std::uint;
|
||||
|
||||
#[deriving(Clone, PartialEq)]
|
||||
pub enum CommentStyle {
|
||||
Isolated, // No code on either side of each line of the comment
|
||||
Trailing, // Code exists to the left of the comment
|
||||
Mixed, // Code before /* foo */ and after the comment
|
||||
BlankLine, // Just a manual blank line "\n\n", for layout
|
||||
}
|
||||
|
||||
#[deriving(Clone)]
|
||||
pub struct Comment {
|
||||
pub style: CommentStyle,
|
||||
pub lines: Vec<String>,
|
||||
pub pos: BytePos,
|
||||
}
|
||||
|
||||
pub fn is_doc_comment(s: &str) -> bool {
|
||||
(s.starts_with("///") && !is_line_non_doc_comment(s)) ||
|
||||
s.starts_with("//!") ||
|
||||
(s.starts_with("/**") && !is_block_non_doc_comment(s)) ||
|
||||
s.starts_with("/*!")
|
||||
}
|
||||
|
||||
pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
|
||||
assert!(is_doc_comment(comment));
|
||||
if comment.starts_with("//!") || comment.starts_with("/*!") {
|
||||
ast::AttrInner
|
||||
} else {
|
||||
ast::AttrOuter
|
||||
}
|
||||
}
|
||||
|
||||
pub fn strip_doc_comment_decoration(comment: &str) -> String {
|
||||
/// remove whitespace-only lines from the start/end of lines
|
||||
fn vertical_trim(lines: Vec<String> ) -> Vec<String> {
|
||||
let mut i = 0u;
|
||||
let mut j = lines.len();
|
||||
// first line of all-stars should be omitted
|
||||
if lines.len() > 0 &&
|
||||
lines.get(0).as_slice().chars().all(|c| c == '*') {
|
||||
i += 1;
|
||||
}
|
||||
while i < j && lines.get(i).as_slice().trim().is_empty() {
|
||||
i += 1;
|
||||
}
|
||||
// like the first, a last line of all stars should be omitted
|
||||
if j > i && lines.get(j - 1)
|
||||
.as_slice()
|
||||
.chars()
|
||||
.skip(1)
|
||||
.all(|c| c == '*') {
|
||||
j -= 1;
|
||||
}
|
||||
while j > i && lines.get(j - 1).as_slice().trim().is_empty() {
|
||||
j -= 1;
|
||||
}
|
||||
return lines.slice(i, j).iter().map(|x| (*x).clone()).collect();
|
||||
}
|
||||
|
||||
/// remove a "[ \t]*\*" block from each line, if possible
|
||||
fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
|
||||
let mut i = uint::MAX;
|
||||
let mut can_trim = true;
|
||||
let mut first = true;
|
||||
for line in lines.iter() {
|
||||
for (j, c) in line.as_slice().chars().enumerate() {
|
||||
if j > i || !"* \t".contains_char(c) {
|
||||
can_trim = false;
|
||||
break;
|
||||
}
|
||||
if c == '*' {
|
||||
if first {
|
||||
i = j;
|
||||
first = false;
|
||||
} else if i != j {
|
||||
can_trim = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if i > line.len() {
|
||||
can_trim = false;
|
||||
}
|
||||
if !can_trim {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if can_trim {
|
||||
lines.iter().map(|line| {
|
||||
line.as_slice().slice(i + 1, line.len()).to_string()
|
||||
}).collect()
|
||||
} else {
|
||||
lines
|
||||
}
|
||||
}
|
||||
|
||||
// one-line comments lose their prefix
|
||||
static ONLINERS: &'static [&'static str] = &["///!", "///", "//!", "//"];
|
||||
for prefix in ONLINERS.iter() {
|
||||
if comment.starts_with(*prefix) {
|
||||
return comment.slice_from(prefix.len()).to_string();
|
||||
}
|
||||
}
|
||||
|
||||
if comment.starts_with("/*") {
|
||||
let lines = comment.slice(3u, comment.len() - 2u)
|
||||
.lines_any()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String> >();
|
||||
|
||||
let lines = vertical_trim(lines);
|
||||
let lines = horizontal_trim(lines);
|
||||
|
||||
return lines.connect("\n").to_string();
|
||||
}
|
||||
|
||||
fail!("not a doc-comment: {}", comment);
|
||||
}
|
||||
|
||||
fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
|
||||
debug!(">>> blank-line comment");
|
||||
comments.push(Comment {
|
||||
style: BlankLine,
|
||||
lines: Vec::new(),
|
||||
pos: rdr.last_pos,
|
||||
});
|
||||
}
|
||||
|
||||
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
|
||||
comments: &mut Vec<Comment>) {
|
||||
while is_whitespace(rdr.curr) && !rdr.is_eof() {
|
||||
if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
|
||||
push_blank_line_comment(rdr, &mut *comments);
|
||||
}
|
||||
rdr.bump();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment>) {
|
||||
debug!(">>> shebang comment");
|
||||
let p = rdr.last_pos;
|
||||
debug!("<<< shebang comment");
|
||||
comments.push(Comment {
|
||||
style: if code_to_the_left { Trailing } else { Isolated },
|
||||
lines: vec!(rdr.read_one_line_comment()),
|
||||
pos: p
|
||||
});
|
||||
}
|
||||
|
||||
fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment>) {
|
||||
debug!(">>> line comments");
|
||||
let p = rdr.last_pos;
|
||||
let mut lines: Vec<String> = Vec::new();
|
||||
while rdr.curr_is('/') && rdr.nextch_is('/') {
|
||||
let line = rdr.read_one_line_comment();
|
||||
debug!("{}", line);
|
||||
// Doc comments are not put in comments.
|
||||
if is_doc_comment(line.as_slice()) {
|
||||
break;
|
||||
}
|
||||
lines.push(line);
|
||||
rdr.consume_non_eol_whitespace();
|
||||
}
|
||||
debug!("<<< line comments");
|
||||
if !lines.is_empty() {
|
||||
comments.push(Comment {
|
||||
style: if code_to_the_left { Trailing } else { Isolated },
|
||||
lines: lines,
|
||||
pos: p
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Returns None if the first col chars of s contain a non-whitespace char.
|
||||
// Otherwise returns Some(k) where k is first char offset after that leading
|
||||
// whitespace. Note k may be outside bounds of s.
|
||||
fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
|
||||
let len = s.len();
|
||||
let mut col = col.to_uint();
|
||||
let mut cursor: uint = 0;
|
||||
while col > 0 && cursor < len {
|
||||
let r: str::CharRange = s.char_range_at(cursor);
|
||||
if !r.ch.is_whitespace() {
|
||||
return None;
|
||||
}
|
||||
cursor = r.next;
|
||||
col -= 1;
|
||||
}
|
||||
return Some(cursor);
|
||||
}
|
||||
|
||||
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
|
||||
s: String, col: CharPos) {
|
||||
let len = s.len();
|
||||
let s1 = match all_whitespace(s.as_slice(), col) {
|
||||
Some(col) => {
|
||||
if col < len {
|
||||
s.as_slice().slice(col, len).to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
}
|
||||
}
|
||||
None => s,
|
||||
};
|
||||
debug!("pushing line: {}", s1);
|
||||
lines.push(s1);
|
||||
}
|
||||
|
||||
fn read_block_comment(rdr: &mut StringReader,
|
||||
code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment> ) {
|
||||
debug!(">>> block comment");
|
||||
let p = rdr.last_pos;
|
||||
let mut lines: Vec<String> = Vec::new();
|
||||
let col = rdr.col;
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
|
||||
let mut curr_line = String::from_str("/*");
|
||||
|
||||
// doc-comments are not really comments, they are attributes
|
||||
if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
|
||||
while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
|
||||
curr_line.push_char(rdr.curr.unwrap());
|
||||
rdr.bump();
|
||||
}
|
||||
if !rdr.is_eof() {
|
||||
curr_line.push_str("*/");
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
}
|
||||
if !is_block_non_doc_comment(curr_line.as_slice()) {
|
||||
return
|
||||
}
|
||||
assert!(!curr_line.as_slice().contains_char('\n'));
|
||||
lines.push(curr_line);
|
||||
} else {
|
||||
let mut level: int = 1;
|
||||
while level > 0 {
|
||||
debug!("=== block comment level {}", level);
|
||||
if rdr.is_eof() {
|
||||
rdr.fatal("unterminated block comment");
|
||||
}
|
||||
if rdr.curr_is('\n') {
|
||||
trim_whitespace_prefix_and_push_line(&mut lines,
|
||||
curr_line,
|
||||
col);
|
||||
curr_line = String::new();
|
||||
rdr.bump();
|
||||
} else {
|
||||
curr_line.push_char(rdr.curr.unwrap());
|
||||
if rdr.curr_is('/') && rdr.nextch_is('*') {
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
curr_line.push_char('*');
|
||||
level += 1;
|
||||
} else {
|
||||
if rdr.curr_is('*') && rdr.nextch_is('/') {
|
||||
rdr.bump();
|
||||
rdr.bump();
|
||||
curr_line.push_char('/');
|
||||
level -= 1;
|
||||
} else { rdr.bump(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
if curr_line.len() != 0 {
|
||||
trim_whitespace_prefix_and_push_line(&mut lines,
|
||||
curr_line,
|
||||
col);
|
||||
}
|
||||
}
|
||||
|
||||
let mut style = if code_to_the_left { Trailing } else { Isolated };
|
||||
rdr.consume_non_eol_whitespace();
|
||||
if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1u {
|
||||
style = Mixed;
|
||||
}
|
||||
debug!("<<< block comment");
|
||||
comments.push(Comment {style: style, lines: lines, pos: p});
|
||||
}
|
||||
|
||||
|
||||
fn consume_comment(rdr: &mut StringReader,
|
||||
code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment> ) {
|
||||
debug!(">>> consume comment");
|
||||
if rdr.curr_is('/') && rdr.nextch_is('/') {
|
||||
read_line_comments(rdr, code_to_the_left, comments);
|
||||
} else if rdr.curr_is('/') && rdr.nextch_is('*') {
|
||||
read_block_comment(rdr, code_to_the_left, comments);
|
||||
} else if rdr.curr_is('#') && rdr.nextch_is('!') {
|
||||
read_shebang_comment(rdr, code_to_the_left, comments);
|
||||
} else { fail!(); }
|
||||
debug!("<<< consume comment");
|
||||
}
|
||||
|
||||
#[deriving(Clone)]
|
||||
pub struct Literal {
|
||||
pub lit: String,
|
||||
pub pos: BytePos,
|
||||
}
|
||||
|
||||
// it appears this function is called only from pprust... that's
|
||||
// probably not a good thing.
|
||||
pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
|
||||
path: String,
|
||||
srdr: &mut io::Reader)
|
||||
-> (Vec<Comment>, Vec<Literal>) {
|
||||
let src = srdr.read_to_end().unwrap();
|
||||
let src = str::from_utf8(src.as_slice()).unwrap().to_string();
|
||||
let cm = CodeMap::new();
|
||||
let filemap = cm.new_filemap(path, src);
|
||||
let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);
|
||||
|
||||
let mut comments: Vec<Comment> = Vec::new();
|
||||
let mut literals: Vec<Literal> = Vec::new();
|
||||
let mut first_read: bool = true;
|
||||
while !rdr.is_eof() {
|
||||
loop {
|
||||
let mut code_to_the_left = !first_read;
|
||||
rdr.consume_non_eol_whitespace();
|
||||
if rdr.curr_is('\n') {
|
||||
code_to_the_left = false;
|
||||
consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
|
||||
}
|
||||
while rdr.peeking_at_comment() {
|
||||
consume_comment(&mut rdr, code_to_the_left, &mut comments);
|
||||
consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
let bstart = rdr.last_pos;
|
||||
rdr.next_token();
|
||||
//discard, and look ahead; we're working with internal state
|
||||
let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
|
||||
if token::is_lit(&tok) {
|
||||
rdr.with_str_from(bstart, |s| {
|
||||
debug!("tok lit: {}", s);
|
||||
literals.push(Literal {lit: s.to_string(), pos: sp.lo});
|
||||
})
|
||||
} else {
|
||||
debug!("tok: {}", token::to_str(&tok));
|
||||
}
|
||||
first_read = false;
|
||||
}
|
||||
|
||||
(comments, literals)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
#[test] fn test_block_doc_comment_1() {
|
||||
let comment = "/**\n * Test \n ** Test\n * Test\n*/";
|
||||
let stripped = strip_doc_comment_decoration(comment);
|
||||
assert_eq!(stripped, " Test \n* Test\n Test".to_string());
|
||||
}
|
||||
|
||||
#[test] fn test_block_doc_comment_2() {
|
||||
let comment = "/**\n * Test\n * Test\n*/";
|
||||
let stripped = strip_doc_comment_decoration(comment);
|
||||
assert_eq!(stripped, " Test\n Test".to_string());
|
||||
}
|
||||
|
||||
#[test] fn test_block_doc_comment_3() {
|
||||
let comment = "/**\n let a: *int;\n *a = 5;\n*/";
|
||||
let stripped = strip_doc_comment_decoration(comment);
|
||||
assert_eq!(stripped, " let a: *int;\n *a = 5;".to_string());
|
||||
}
|
||||
|
||||
#[test] fn test_block_doc_comment_4() {
|
||||
let comment = "/*******************\n test\n *********************/";
|
||||
let stripped = strip_doc_comment_decoration(comment);
|
||||
assert_eq!(stripped, " test".to_string());
|
||||
}
|
||||
|
||||
#[test] fn test_line_doc_comment() {
|
||||
let stripped = strip_doc_comment_decoration("/// test");
|
||||
assert_eq!(stripped, " test".to_string());
|
||||
let stripped = strip_doc_comment_decoration("///! test");
|
||||
assert_eq!(stripped, " test".to_string());
|
||||
let stripped = strip_doc_comment_decoration("// test");
|
||||
assert_eq!(stripped, " test".to_string());
|
||||
let stripped = strip_doc_comment_decoration("// test");
|
||||
assert_eq!(stripped, " test".to_string());
|
||||
let stripped = strip_doc_comment_decoration("///test");
|
||||
assert_eq!(stripped, "test".to_string());
|
||||
let stripped = strip_doc_comment_decoration("///!test");
|
||||
assert_eq!(stripped, "test".to_string());
|
||||
let stripped = strip_doc_comment_decoration("//test");
|
||||
assert_eq!(stripped, "test".to_string());
|
||||
}
|
||||
}
|
||||
1153
src/libsyntax/parse/lexer/mod.rs
Normal file
1153
src/libsyntax/parse/lexer/mod.rs
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue