Make lexer buffer the whole file

This way, it won't have to go through a bunch of calls for each
byte fetched.
This commit is contained in:
Marijn Haverbeke 2011-04-08 16:48:17 +02:00
parent cae703c0b1
commit 094d31f5e4
2 changed files with 82 additions and 67 deletions

View file

@ -11,24 +11,26 @@ import util.common;
import util.common.new_str_hash;
state type reader = state obj {
fn is_eof() -> bool;
fn curr() -> char;
fn next() -> char;
impure fn bump();
fn mark();
fn get_filename() -> str;
fn get_mark_pos() -> common.pos;
fn get_curr_pos() -> common.pos;
fn get_keywords() -> hashmap[str,token.token];
fn get_reserved() -> hashmap[str,()];
fn is_eof() -> bool;
fn curr() -> char;
fn next() -> char;
impure fn init();
impure fn bump();
fn mark();
fn get_filename() -> str;
fn get_mark_pos() -> common.pos;
fn get_curr_pos() -> common.pos;
fn get_keywords() -> hashmap[str,token.token];
fn get_reserved() -> hashmap[str,()];
};
impure fn new_reader(io.reader rdr, str filename) -> reader
{
state obj reader(io.reader rdr,
state obj reader(str file,
str filename,
mutable char c,
mutable char n,
uint len,
mutable uint pos,
mutable char ch,
mutable uint mark_line,
mutable uint mark_col,
mutable uint line,
@ -36,64 +38,69 @@ impure fn new_reader(io.reader rdr, str filename) -> reader
hashmap[str,token.token] keywords,
hashmap[str,()] reserved) {
fn is_eof() -> bool {
ret c == (-1) as char;
fn is_eof() -> bool {
ret ch == -1 as char;
}
fn get_curr_pos() -> common.pos {
ret rec(line=line, col=col);
}
fn get_mark_pos() -> common.pos {
ret rec(line=mark_line, col=mark_col);
}
fn get_filename() -> str {
ret filename;
}
fn curr() -> char {
ret ch;
}
fn next() -> char {
if (pos < len) {ret _str.char_at(file, pos);}
else {ret -1 as char;}
}
impure fn init() {
if (pos < len) {
auto next = _str.char_range_at(file, pos);
pos = next._1;
ch = next._0;
}
}
fn get_curr_pos() -> common.pos {
ret rec(line=line, col=col);
}
fn get_mark_pos() -> common.pos {
ret rec(line=mark_line, col=mark_col);
}
fn get_filename() -> str {
ret filename;
}
fn curr() -> char {
ret c;
}
fn next() -> char {
ret n;
}
impure fn bump() {
let char prev = c;
c = n;
if (c == (-1) as char) {
ret;
}
if (prev == '\n') {
impure fn bump() {
if (pos < len) {
if (ch == '\n') {
line += 1u;
col = 0u;
} else {
col += 1u;
}
n = rdr.read_char();
}
fn mark() {
mark_line = line;
mark_col = col;
}
fn get_keywords() -> hashmap[str,token.token] {
ret keywords;
}
fn get_reserved() -> hashmap[str,()] {
ret reserved;
auto next = _str.char_range_at(file, pos);
pos = next._1;
ch = next._0;
} else {
ch = -1 as char;
}
}
fn mark() {
mark_line = line;
mark_col = col;
}
fn get_keywords() -> hashmap[str,token.token] {
ret keywords;
}
fn get_reserved() -> hashmap[str,()] {
ret reserved;
}
}
auto keywords = new_str_hash[token.token]();
keywords.insert("mod", token.MOD);
@ -208,13 +215,14 @@ impure fn new_reader(io.reader rdr, str filename) -> reader
reserved.insert("m128", ()); // IEEE 754-2008 'decimal128'
reserved.insert("dec", ()); // One of m32, m64, m128
ret reader(rdr, filename, rdr.read_char(),
rdr.read_char(), 1u, 0u, 1u, 0u, keywords, reserved);
auto file = _str.unsafe_from_bytes(rdr.read_whole_stream());
auto rd = reader(file, filename, _str.byte_len(file), 0u, -1 as char,
1u, 0u, 1u, 0u, keywords, reserved);
rd.init();
ret rd;
}
fn in_range(char c, char lo, char hi) -> bool {
ret lo <= c && c <= hi;
}
@ -689,7 +697,6 @@ impure fn next_token(reader rdr) -> token.token {
case ('"') {
rdr.bump();
// FIXME: general utf8-consumption support.
while (rdr.curr() != '"') {
alt (rdr.curr()) {
case ('\\') {
@ -850,7 +857,7 @@ impure fn read_block_comment(reader rdr) -> cmnt {
impure fn gather_comments(str path) -> vec[cmnt] {
auto srdr = io.file_reader(path);
auto rdr = lexer.new_reader(srdr, path);
auto rdr = new_reader(srdr, path);
let vec[cmnt] comments = vec();
while (!rdr.is_eof()) {
while (true) {

View file

@ -41,6 +41,7 @@ type reader =
impure fn read_le_uint(uint size) -> uint;
impure fn read_le_int(uint size) -> int;
impure fn read_be_uint(uint size) -> uint;
impure fn read_whole_stream() -> vec[u8];
impure fn seek(int offset, seek_style whence);
impure fn tell() -> uint; // FIXME: eventually u64
@ -170,6 +171,13 @@ state obj new_reader(buf_reader rdr) {
}
ret val;
}
impure fn read_whole_stream() -> vec[u8] {
let vec[u8] buf = vec();
while (!rdr.eof()) {
buf += rdr.read(2048u);
}
ret buf;
}
impure fn seek(int offset, seek_style whence) {
ret rdr.seek(offset, whence);
}