From 007246c17f1891cabb84c8a82250703f542cd58e Mon Sep 17 00:00:00 2001 From: Nick Cameron Date: Thu, 2 Jul 2015 15:37:52 +1200 Subject: [PATCH 1/4] Allow for space between each filemap in the codemap So if a filemap's last byte is at position n in the codemap, then n+1 will not refer to any filemap, and the next filemap will begin an n+2. This is useful for empty files, it means that every file (even empty ones) has a byte in the codemap. Closes #23301, #26504 --- src/libsyntax/codemap.rs | 164 ++++++++++++++++++++---------------- src/libsyntax/diagnostic.rs | 13 +-- 2 files changed, 98 insertions(+), 79 deletions(-) diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 5ddcfaef9ea2..2f109b589f14 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -115,6 +115,10 @@ impl Sub for CharPos { /// are *absolute* positions from the beginning of the codemap, not positions /// relative to FileMaps. Methods on the CodeMap can be used to relate spans back /// to the original source. +/// You must be careful if the span crosses more than one file - you will not be +/// able to use many of the functions on spans in codemap and you cannot assume +/// that the length of the span = hi - lo; there may be space in the BytePos +/// range between files. #[derive(Clone, Copy, Hash)] pub struct Span { pub lo: BytePos, @@ -339,7 +343,7 @@ pub struct MultiByteChar { pub bytes: usize, } -/// A single source in the CodeMap +/// A single source in the CodeMap. pub struct FileMap { /// The name of the file that the source came from, source that doesn't /// originate from files has names between angle brackets by convention, @@ -508,6 +512,9 @@ impl FileMap { lines.get(line_number).map(|&line| { let begin: BytePos = line - self.start_pos; let begin = begin.to_usize(); + // We can't use `lines.get(line_number+1)` because we might + // be parsing when we call this function and thus the current + // line is the last one we have line info for. let slice = &src[begin..]; match slice.find('\n') { Some(e) => &slice[..e], @@ -598,27 +605,27 @@ impl CodeMap { Ok(self.new_filemap(path.to_str().unwrap().to_string(), src)) } - pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc { - let mut files = self.files.borrow_mut(); - let start_pos = match files.last() { + fn next_start_pos(&self) -> usize { + let files = self.files.borrow(); + match files.last() { None => 0, - Some(last) => last.end_pos.to_usize(), - }; + // Add one so there is some space between files. This lets us distinguish + // positions in the codemap, even in the presence of zero-length files. + Some(last) => last.end_pos.to_usize() + 1, + } + } + + /// Creates a new filemap without setting its line information. If you don't + /// intend to set the line information yourself, you should use new_filemap_and_lines. + pub fn new_filemap(&self, filename: FileName, mut src: String) -> Rc { + let start_pos = self.next_start_pos(); + let mut files = self.files.borrow_mut(); // Remove utf-8 BOM if any. if src.starts_with("\u{feff}") { src.drain(..3); } - // Append '\n' in case it's not already there. - // This is a workaround to prevent CodeMap.lookup_filemap_idx from - // accidentally overflowing into the next filemap in case the last byte - // of span is also the last byte of filemap, which leads to incorrect - // results from CodeMap.span_to_*. - if !src.is_empty() && !src.ends_with("\n") { - src.push('\n'); - } - let end_pos = start_pos + src.len(); let filemap = Rc::new(FileMap { @@ -645,11 +652,8 @@ impl CodeMap { mut file_local_lines: Vec, mut file_local_multibyte_chars: Vec) -> Rc { + let start_pos = self.next_start_pos(); let mut files = self.files.borrow_mut(); - let start_pos = match files.last() { - None => 0, - Some(last) => last.end_pos.to_usize(), - }; let end_pos = Pos::from_usize(start_pos + source_len); let start_pos = Pos::from_usize(start_pos); @@ -686,39 +690,61 @@ impl CodeMap { /// Lookup source information about a BytePos pub fn lookup_char_pos(&self, pos: BytePos) -> Loc { - let FileMapAndLine {fm: f, line: a} = self.lookup_line(pos); - let line = a + 1; // Line numbers start at 1 let chpos = self.bytepos_to_file_charpos(pos); - let linebpos = (*f.lines.borrow())[a]; - let linechpos = self.bytepos_to_file_charpos(linebpos); - debug!("byte pos {:?} is on the line at byte pos {:?}", - pos, linebpos); - debug!("char pos {:?} is on the line at char pos {:?}", - chpos, linechpos); - debug!("byte is on line: {}", line); - assert!(chpos >= linechpos); - Loc { - file: f, - line: line, - col: chpos - linechpos + match self.lookup_line(pos) { + Ok(FileMapAndLine { fm: f, line: a }) => { + let line = a + 1; // Line numbers start at 1 + let linebpos = (*f.lines.borrow())[a]; + let linechpos = self.bytepos_to_file_charpos(linebpos); + debug!("byte pos {:?} is on the line at byte pos {:?}", + pos, linebpos); + debug!("char pos {:?} is on the line at char pos {:?}", + chpos, linechpos); + debug!("byte is on line: {}", line); + assert!(chpos >= linechpos); + Loc { + file: f, + line: line, + col: chpos - linechpos, + } + } + Err(f) => { + Loc { + file: f, + line: 0, + col: chpos, + } + } } } - fn lookup_line(&self, pos: BytePos) -> FileMapAndLine { + // If the relevant filemap is empty, we don't return a line number. + fn lookup_line(&self, pos: BytePos) -> Result> { let idx = self.lookup_filemap_idx(pos); let files = self.files.borrow(); let f = (*files)[idx].clone(); + + let len = f.lines.borrow().len(); + if len == 0 { + return Err(f); + } + let mut a = 0; { let lines = f.lines.borrow(); let mut b = lines.len(); while b - a > 1 { let m = (a + b) / 2; - if (*lines)[m] > pos { b = m; } else { a = m; } + if (*lines)[m] > pos { + b = m; + } else { + a = m; + } } + assert!(a <= lines.len()); } - FileMapAndLine {fm: f, line: a} + Ok(FileMapAndLine { fm: f, line: a }) } pub fn lookup_char_pos_adj(&self, pos: BytePos) -> LocWithOpt { @@ -880,12 +906,15 @@ impl CodeMap { CharPos(bpos.to_usize() - map.start_pos.to_usize() - total_extra_bytes) } + // Return the index of the filemap (in self.files) which contains pos. fn lookup_filemap_idx(&self, pos: BytePos) -> usize { let files = self.files.borrow(); let files = &*files; - let len = files.len(); + let count = files.len(); + + // Binary search for the filemap. let mut a = 0; - let mut b = len; + let mut b = count; while b - a > 1 { let m = (a + b) / 2; if files[m].start_pos > pos { @@ -894,26 +923,8 @@ impl CodeMap { a = m; } } - // There can be filemaps with length 0. These have the same start_pos as - // the previous filemap, but are not the filemaps we want (because they - // are length 0, they cannot contain what we are looking for). So, - // rewind until we find a useful filemap. - loop { - let lines = files[a].lines.borrow(); - let lines = lines; - if !lines.is_empty() { - break; - } - if a == 0 { - panic!("position {} does not resolve to a source location", - pos.to_usize()); - } - a -= 1; - } - if a >= len { - panic!("position {} does not resolve to a source location", - pos.to_usize()) - } + + assert!(a < count, "position {} does not resolve to a source location", pos.to_usize()); return a; } @@ -1027,10 +1038,13 @@ mod tests { let fm = cm.new_filemap("blork.rs".to_string(), "first line.\nsecond line".to_string()); fm.next_line(BytePos(0)); + // Test we can get lines with partial line info. assert_eq!(fm.get_line(0), Some("first line.")); - // TESTING BROKEN BEHAVIOR: + // TESTING BROKEN BEHAVIOR: line break declared before actual line break. fm.next_line(BytePos(10)); assert_eq!(fm.get_line(1), Some(".")); + fm.next_line(BytePos(12)); + assert_eq!(fm.get_line(2), Some("second line")); } #[test] @@ -1056,9 +1070,9 @@ mod tests { fm1.next_line(BytePos(0)); fm1.next_line(BytePos(12)); - fm2.next_line(BytePos(24)); - fm3.next_line(BytePos(24)); - fm3.next_line(BytePos(34)); + fm2.next_line(fm2.start_pos); + fm3.next_line(fm3.start_pos); + fm3.next_line(fm3.start_pos + BytePos(12)); cm } @@ -1068,11 +1082,15 @@ mod tests { // Test lookup_byte_offset let cm = init_code_map(); - let fmabp1 = cm.lookup_byte_offset(BytePos(22)); + let fmabp1 = cm.lookup_byte_offset(BytePos(23)); assert_eq!(fmabp1.fm.name, "blork.rs"); - assert_eq!(fmabp1.pos, BytePos(22)); + assert_eq!(fmabp1.pos, BytePos(23)); - let fmabp2 = cm.lookup_byte_offset(BytePos(24)); + let fmabp1 = cm.lookup_byte_offset(BytePos(24)); + assert_eq!(fmabp1.fm.name, "empty.rs"); + assert_eq!(fmabp1.pos, BytePos(0)); + + let fmabp2 = cm.lookup_byte_offset(BytePos(25)); assert_eq!(fmabp2.fm.name, "blork2.rs"); assert_eq!(fmabp2.pos, BytePos(0)); } @@ -1085,7 +1103,7 @@ mod tests { let cp1 = cm.bytepos_to_file_charpos(BytePos(22)); assert_eq!(cp1, CharPos(22)); - let cp2 = cm.bytepos_to_file_charpos(BytePos(24)); + let cp2 = cm.bytepos_to_file_charpos(BytePos(25)); assert_eq!(cp2, CharPos(0)); } @@ -1099,7 +1117,7 @@ mod tests { assert_eq!(loc1.line, 2); assert_eq!(loc1.col, CharPos(10)); - let loc2 = cm.lookup_char_pos(BytePos(24)); + let loc2 = cm.lookup_char_pos(BytePos(25)); assert_eq!(loc2.file.name, "blork2.rs"); assert_eq!(loc2.line, 1); assert_eq!(loc2.col, CharPos(0)); @@ -1115,18 +1133,18 @@ mod tests { "first line€€.\n€ second line".to_string()); fm1.next_line(BytePos(0)); - fm1.next_line(BytePos(22)); - fm2.next_line(BytePos(40)); - fm2.next_line(BytePos(58)); + fm1.next_line(BytePos(28)); + fm2.next_line(fm2.start_pos); + fm2.next_line(fm2.start_pos + BytePos(20)); fm1.record_multibyte_char(BytePos(3), 3); fm1.record_multibyte_char(BytePos(9), 3); fm1.record_multibyte_char(BytePos(12), 3); fm1.record_multibyte_char(BytePos(15), 3); fm1.record_multibyte_char(BytePos(18), 3); - fm2.record_multibyte_char(BytePos(50), 3); - fm2.record_multibyte_char(BytePos(53), 3); - fm2.record_multibyte_char(BytePos(58), 3); + fm2.record_multibyte_char(fm2.start_pos + BytePos(10), 3); + fm2.record_multibyte_char(fm2.start_pos + BytePos(13), 3); + fm2.record_multibyte_char(fm2.start_pos + BytePos(18), 3); cm } diff --git a/src/libsyntax/diagnostic.rs b/src/libsyntax/diagnostic.rs index fbf015169f85..60f713b52892 100644 --- a/src/libsyntax/diagnostic.rs +++ b/src/libsyntax/diagnostic.rs @@ -854,11 +854,12 @@ mod test { println!("done"); let vec = data.lock().unwrap().clone(); let vec: &[u8] = &vec; - println!("{}", from_utf8(vec).unwrap()); - assert_eq!(vec, "dummy.txt: 8 \n\ - dummy.txt: 9 \n\ - dummy.txt:10 \n\ - dummy.txt:11 \n\ - dummy.txt:12 \n".as_bytes()); + let str = from_utf8(vec).unwrap(); + println!("{}", str); + assert_eq!(str, "dummy.txt: 8 line8\n\ + dummy.txt: 9 line9\n\ + dummy.txt:10 line10\n\ + dummy.txt:11 e-lä-vän\n\ + dummy.txt:12 tolv\n"); } } From 0e907fa542d7bfa08ca1f55512ffa4a5ff70ed15 Mon Sep 17 00:00:00 2001 From: Nick Cameron Date: Thu, 2 Jul 2015 17:14:14 +1200 Subject: [PATCH 2/4] Provide a filemap ctor with line info --- src/libsyntax/codemap.rs | 30 ++++++++++++++++-------------- src/libsyntax/diagnostic.rs | 7 +------ src/libsyntax/ext/source_util.rs | 4 ++-- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 2f109b589f14..3e5c10702b64 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -642,6 +642,21 @@ impl CodeMap { filemap } + /// Creates a new filemap and sets its line information. + pub fn new_filemap_and_lines(&self, filename: &str, src: &str) -> Rc { + let fm = self.new_filemap(filename.to_string(), src.to_owned()); + let mut byte_pos: u32 = 0; + for line in src.lines() { + // register the start of this line + fm.next_line(BytePos(byte_pos)); + + // update byte_pos to include this line and the \n at the end + byte_pos += line.len() as u32 + 1; + } + fm + } + + /// Allocates a new FileMap representing a source file from an external /// crate. The source code of such an "imported filemap" is not available, /// but we still know enough to generate accurate debuginfo location @@ -1190,19 +1205,6 @@ mod tests { Span { lo: BytePos(left_index), hi: BytePos(right_index + 1), expn_id: NO_EXPANSION } } - fn new_filemap_and_lines(cm: &CodeMap, filename: &str, input: &str) -> Rc { - let fm = cm.new_filemap(filename.to_string(), input.to_string()); - let mut byte_pos: u32 = 0; - for line in input.lines() { - // register the start of this line - fm.next_line(BytePos(byte_pos)); - - // update byte_pos to include this line and the \n at the end - byte_pos += line.len() as u32 + 1; - } - fm - } - /// Test span_to_snippet and span_to_lines for a span coverting 3 /// lines in the middle of a file. #[test] @@ -1210,7 +1212,7 @@ mod tests { let cm = CodeMap::new(); let inputtext = "aaaaa\nbbbbBB\nCCC\nDDDDDddddd\neee\n"; let selection = " \n ^~\n~~~\n~~~~~ \n \n"; - new_filemap_and_lines(&cm, "blork.rs", inputtext); + cm.new_filemap_and_lines("blork.rs", inputtext); let span = span_from_selection(inputtext, selection); // check that we are extracting the text we thought we were extracting diff --git a/src/libsyntax/diagnostic.rs b/src/libsyntax/diagnostic.rs index 60f713b52892..22aea1ce079e 100644 --- a/src/libsyntax/diagnostic.rs +++ b/src/libsyntax/diagnostic.rs @@ -837,12 +837,7 @@ mod test { tolv dreizehn "; - let file = cm.new_filemap("dummy.txt".to_string(), content.to_string()); - for (i, b) in content.bytes().enumerate() { - if b == b'\n' { - file.next_line(BytePos(i as u32)); - } - } + let file = cm.new_filemap_and_lines("dummy.txt", content); let start = file.lines.borrow()[7]; let end = file.lines.borrow()[11]; let sp = mk_sp(start, end); diff --git a/src/libsyntax/ext/source_util.rs b/src/libsyntax/ext/source_util.rs index 5418b1f43e4a..22517dc5f1bb 100644 --- a/src/libsyntax/ext/source_util.rs +++ b/src/libsyntax/ext/source_util.rs @@ -156,7 +156,7 @@ pub fn expand_include_str(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) // dependency information let filename = format!("{}", file.display()); let interned = token::intern_and_get_ident(&src[..]); - cx.codemap().new_filemap(filename, src); + cx.codemap().new_filemap_and_lines(&filename, &src); base::MacEager::expr(cx.expr_str(sp, interned)) } @@ -187,7 +187,7 @@ pub fn expand_include_bytes(cx: &mut ExtCtxt, sp: Span, tts: &[ast::TokenTree]) // Add this input file to the code map to make it available as // dependency information, but don't enter it's contents let filename = format!("{}", file.display()); - cx.codemap().new_filemap(filename, "".to_string()); + cx.codemap().new_filemap_and_lines(&filename, ""); base::MacEager::expr(cx.expr_lit(sp, ast::LitBinary(Rc::new(bytes)))) } From bf34187a2f874690798b06d9350c1f6c4755243a Mon Sep 17 00:00:00 2001 From: Nick Cameron Date: Fri, 3 Jul 2015 12:54:05 +1200 Subject: [PATCH 3/4] Test --- src/test/compile-fail/cfg-empty-codemap.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/test/compile-fail/cfg-empty-codemap.rs diff --git a/src/test/compile-fail/cfg-empty-codemap.rs b/src/test/compile-fail/cfg-empty-codemap.rs new file mode 100644 index 000000000000..4c27d57008d0 --- /dev/null +++ b/src/test/compile-fail/cfg-empty-codemap.rs @@ -0,0 +1,18 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Tests that empty codemaps don't ICE (#23301) + +// compile-flags: --cfg "" + +// error-pattern: expected ident, found + +pub fn main() { +} From f47d20aecdcd7db34d41ad1666fd3eee095cc943 Mon Sep 17 00:00:00 2001 From: Nick Cameron Date: Mon, 6 Jul 2015 14:13:19 +1200 Subject: [PATCH 4/4] Use a span from the correct file for the inner span of a module This basically only affects modules which are empty (or only contain comments). Closes #26755 --- src/librustdoc/clean/mod.rs | 4 ++++ src/libsyntax/codemap.rs | 2 +- src/libsyntax/parse/lexer/mod.rs | 1 + src/libsyntax/parse/mod.rs | 11 +++++++++-- src/libsyntax/parse/parser.rs | 24 ++++++++++++++---------- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/librustdoc/clean/mod.rs b/src/librustdoc/clean/mod.rs index 3cc24550297c..d4eeaa1de109 100644 --- a/src/librustdoc/clean/mod.rs +++ b/src/librustdoc/clean/mod.rs @@ -1947,6 +1947,10 @@ impl Span { impl Clean for syntax::codemap::Span { fn clean(&self, cx: &DocContext) -> Span { + if *self == DUMMY_SP { + return Span::empty(); + } + let cm = cx.sess().codemap(); let filename = cm.span_to_filename(*self); let lo = cm.lookup_char_pos(self.lo); diff --git a/src/libsyntax/codemap.rs b/src/libsyntax/codemap.rs index 3e5c10702b64..e6bc3218897d 100644 --- a/src/libsyntax/codemap.rs +++ b/src/libsyntax/codemap.rs @@ -894,7 +894,7 @@ impl CodeMap { FileMapAndBytePos {fm: fm, pos: offset} } - /// Converts an absolute BytePos to a CharPos relative to the filemap and above. + /// Converts an absolute BytePos to a CharPos relative to the filemap. pub fn bytepos_to_file_charpos(&self, bpos: BytePos) -> CharPos { let idx = self.lookup_filemap_idx(bpos); let files = self.files.borrow(); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index b6a3788dacc3..621335ecd979 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -231,6 +231,7 @@ impl<'a> StringReader<'a> { None => { if self.is_eof() { self.peek_tok = token::Eof; + self.peek_span = codemap::mk_sp(self.filemap.end_pos, self.filemap.end_pos); } else { let start_bytepos = self.last_pos; self.peek_tok = self.next_token_inner(); diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index d6c28d414479..34a63fc92fea 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -11,7 +11,7 @@ //! The main parser interface use ast; -use codemap::{Span, CodeMap, FileMap}; +use codemap::{self, Span, CodeMap, FileMap}; use diagnostic::{SpanHandler, Handler, Auto, FatalError}; use parse::attr::ParserAttr; use parse::parser::Parser; @@ -203,7 +203,14 @@ pub fn new_sub_parser_from_file<'a>(sess: &'a ParseSess, pub fn filemap_to_parser<'a>(sess: &'a ParseSess, filemap: Rc, cfg: ast::CrateConfig) -> Parser<'a> { - tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg) + let end_pos = filemap.end_pos; + let mut parser = tts_to_parser(sess, filemap_to_tts(sess, filemap), cfg); + + if parser.token == token::Eof && parser.span == codemap::DUMMY_SP { + parser.span = codemap::mk_sp(end_pos, end_pos); + } + + parser } // must preserve old name for now, because quote! from the *existing* diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 28802d323c69..db1b2489f1d9 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -4824,8 +4824,14 @@ impl<'a> Parser<'a> { return Err(self.fatal(&format!("expected item, found `{}`", token_str))); } + let hi = if self.span == codemap::DUMMY_SP { + inner_lo + } else { + self.span.lo + }; + Ok(ast::Mod { - inner: mk_sp(inner_lo, self.span.lo), + inner: mk_sp(inner_lo, hi), items: items }) } @@ -4869,8 +4875,7 @@ impl<'a> Parser<'a> { fn push_mod_path(&mut self, id: Ident, attrs: &[Attribute]) { let default_path = self.id_to_interned_str(id); - let file_path = match ::attr::first_attr_value_str_by_name(attrs, - "path") { + let file_path = match ::attr::first_attr_value_str_by_name(attrs, "path") { Some(d) => d, None => default_path, }; @@ -5003,13 +5008,12 @@ impl<'a> Parser<'a> { included_mod_stack.push(path.clone()); drop(included_mod_stack); - let mut p0 = - new_sub_parser_from_file(self.sess, - self.cfg.clone(), - &path, - owns_directory, - Some(name), - id_sp); + let mut p0 = new_sub_parser_from_file(self.sess, + self.cfg.clone(), + &path, + owns_directory, + Some(name), + id_sp); let mod_inner_lo = p0.span.lo; let mod_attrs = p0.parse_inner_attributes(); let m0 = try!(p0.parse_mod_items(&token::Eof, mod_inner_lo));