Make rustc_lexer::cursor::Cursor public.

`Cursor` is currently hidden, and the main tokenization path uses
`rustc_lexer::first_token` which involves constructing a new `Cursor`
for every single token, which is weird. Also, `first_token` also can't
handle empty input, so callers have to check for that first.

This commit makes `Cursor` public, so `StringReader` can contain a
`Cursor`, which results in a simpler structure. The commit also changes
`StringReader::advance_token` so it returns an `Option<Token>`,
simplifying the the empty input case.
This commit is contained in:
Nicholas Nethercote 2022-09-26 09:18:23 +10:00
parent 33516ac09a
commit aa6bfaf04b
4 changed files with 26 additions and 36 deletions

View file

@ -13,6 +13,7 @@ use std::collections::VecDeque;
use std::fmt::{Display, Write};
use rustc_data_structures::fx::FxHashMap;
use rustc_lexer::cursor::Cursor;
use rustc_lexer::{LiteralKind, TokenKind};
use rustc_span::edition::Edition;
use rustc_span::symbol::Symbol;
@ -408,15 +409,13 @@ enum Highlight<'a> {
struct TokenIter<'a> {
src: &'a str,
cursor: Cursor<'a>,
}
impl<'a> Iterator for TokenIter<'a> {
type Item = (TokenKind, &'a str);
fn next(&mut self) -> Option<(TokenKind, &'a str)> {
if self.src.is_empty() {
return None;
}
let token = rustc_lexer::first_token(self.src);
let token = self.cursor.advance_token()?;
let (text, rest) = self.src.split_at(token.len as usize);
self.src = rest;
Some((token.kind, text))
@ -525,7 +524,7 @@ impl<'a> Classifier<'a> {
/// Takes as argument the source code to HTML-ify, the rust edition to use and the source code
/// file span which will be used later on by the `span_correspondance_map`.
fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> {
let tokens = PeekIter::new(TokenIter { src });
let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) });
let decorations = decoration_info.map(Decorations::new);
Classifier {
tokens,