Remove rustc_lexer::cursor module.

It contains the `Cursor` type and an `impl Cursor` block with a few
methods. But there is a larger `impl Cursor` block in the crate root.
The only other thing in the `cursor` module is the simple
`FrontmatterAllowed` type.

So this commit just moves everything in the `cursor` module (which isn't
much) into the crate root.
This commit is contained in:
Nicholas Nethercote 2026-01-21 15:00:59 +11:00
parent b3cda168c8
commit 2135bc1f10
2 changed files with 125 additions and 130 deletions

View file

@ -1,125 +0,0 @@
use std::str::Chars;
pub enum FrontmatterAllowed {
Yes,
No,
}
/// Peekable iterator over a char sequence.
///
/// Next characters can be peeked via `first` method,
/// and position can be shifted forward via `bump` method.
pub struct Cursor<'a> {
len_remaining: usize,
/// Iterator over chars. Slightly faster than a &str.
chars: Chars<'a>,
pub(crate) frontmatter_allowed: FrontmatterAllowed,
#[cfg(debug_assertions)]
prev: char,
}
pub(crate) const EOF_CHAR: char = '\0';
impl<'a> Cursor<'a> {
pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
Cursor {
len_remaining: input.len(),
chars: input.chars(),
frontmatter_allowed,
#[cfg(debug_assertions)]
prev: EOF_CHAR,
}
}
pub fn as_str(&self) -> &'a str {
self.chars.as_str()
}
/// Returns the last eaten symbol (or `'\0'` in release builds).
/// (For debug assertions only.)
pub(crate) fn prev(&self) -> char {
#[cfg(debug_assertions)]
{
self.prev
}
#[cfg(not(debug_assertions))]
{
EOF_CHAR
}
}
/// Peeks the next symbol from the input stream without consuming it.
/// If requested position doesn't exist, `EOF_CHAR` is returned.
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
/// it should be checked with `is_eof` method.
pub fn first(&self) -> char {
// `.next()` optimizes better than `.nth(0)`
self.chars.clone().next().unwrap_or(EOF_CHAR)
}
/// Peeks the second symbol from the input stream without consuming it.
pub(crate) fn second(&self) -> char {
// `.next()` optimizes better than `.nth(1)`
let mut iter = self.chars.clone();
iter.next();
iter.next().unwrap_or(EOF_CHAR)
}
/// Peeks the third symbol from the input stream without consuming it.
pub fn third(&self) -> char {
// `.next()` optimizes better than `.nth(2)`
let mut iter = self.chars.clone();
iter.next();
iter.next();
iter.next().unwrap_or(EOF_CHAR)
}
/// Checks if there is nothing more to consume.
pub(crate) fn is_eof(&self) -> bool {
self.chars.as_str().is_empty()
}
/// Returns amount of already consumed symbols.
pub(crate) fn pos_within_token(&self) -> u32 {
(self.len_remaining - self.chars.as_str().len()) as u32
}
/// Resets the number of bytes consumed to 0.
pub(crate) fn reset_pos_within_token(&mut self) {
self.len_remaining = self.chars.as_str().len();
}
/// Moves to the next character.
pub(crate) fn bump(&mut self) -> Option<char> {
let c = self.chars.next()?;
#[cfg(debug_assertions)]
{
self.prev = c;
}
Some(c)
}
/// Moves to a substring by a number of bytes.
pub(crate) fn bump_bytes(&mut self, n: usize) {
self.chars = self.as_str()[n..].chars();
}
/// Eats symbols while predicate returns true or until the end of file is reached.
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
// LLVM can inline all of this and compile it down to fast iteration over bytes.
while predicate(self.first()) && !self.is_eof() {
self.bump();
}
}
pub(crate) fn eat_until(&mut self, byte: u8) {
self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
Some(index) => self.as_str()[index..].chars(),
None => "".chars(),
}
}
}

View file

@ -25,15 +25,13 @@
#![deny(unstable_features)]
// tidy-alphabetical-end
mod cursor;
#[cfg(test)]
mod tests;
use std::str::Chars;
use LiteralKind::*;
use TokenKind::*;
use cursor::EOF_CHAR;
pub use cursor::{Cursor, FrontmatterAllowed};
pub use unicode_ident::UNICODE_VERSION;
use unicode_properties::UnicodeEmoji;
@ -407,7 +405,129 @@ pub fn is_ident(string: &str) -> bool {
}
}
impl Cursor<'_> {
pub enum FrontmatterAllowed {
Yes,
No,
}
/// Peekable iterator over a char sequence.
///
/// Next characters can be peeked via `first` method,
/// and position can be shifted forward via `bump` method.
pub struct Cursor<'a> {
len_remaining: usize,
/// Iterator over chars. Slightly faster than a &str.
chars: Chars<'a>,
pub(crate) frontmatter_allowed: FrontmatterAllowed,
#[cfg(debug_assertions)]
prev: char,
}
const EOF_CHAR: char = '\0';
impl<'a> Cursor<'a> {
pub fn new(input: &'a str, frontmatter_allowed: FrontmatterAllowed) -> Cursor<'a> {
Cursor {
len_remaining: input.len(),
chars: input.chars(),
frontmatter_allowed,
#[cfg(debug_assertions)]
prev: EOF_CHAR,
}
}
pub fn as_str(&self) -> &'a str {
self.chars.as_str()
}
/// Returns the last eaten symbol (or `'\0'` in release builds).
/// (For debug assertions only.)
pub(crate) fn prev(&self) -> char {
#[cfg(debug_assertions)]
{
self.prev
}
#[cfg(not(debug_assertions))]
{
EOF_CHAR
}
}
/// Peeks the next symbol from the input stream without consuming it.
/// If requested position doesn't exist, `EOF_CHAR` is returned.
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
/// it should be checked with `is_eof` method.
pub fn first(&self) -> char {
// `.next()` optimizes better than `.nth(0)`
self.chars.clone().next().unwrap_or(EOF_CHAR)
}
/// Peeks the second symbol from the input stream without consuming it.
pub(crate) fn second(&self) -> char {
// `.next()` optimizes better than `.nth(1)`
let mut iter = self.chars.clone();
iter.next();
iter.next().unwrap_or(EOF_CHAR)
}
/// Peeks the third symbol from the input stream without consuming it.
pub fn third(&self) -> char {
// `.next()` optimizes better than `.nth(2)`
let mut iter = self.chars.clone();
iter.next();
iter.next();
iter.next().unwrap_or(EOF_CHAR)
}
/// Checks if there is nothing more to consume.
pub(crate) fn is_eof(&self) -> bool {
self.chars.as_str().is_empty()
}
/// Returns amount of already consumed symbols.
pub(crate) fn pos_within_token(&self) -> u32 {
(self.len_remaining - self.chars.as_str().len()) as u32
}
/// Resets the number of bytes consumed to 0.
pub(crate) fn reset_pos_within_token(&mut self) {
self.len_remaining = self.chars.as_str().len();
}
/// Moves to the next character.
pub(crate) fn bump(&mut self) -> Option<char> {
let c = self.chars.next()?;
#[cfg(debug_assertions)]
{
self.prev = c;
}
Some(c)
}
/// Moves to a substring by a number of bytes.
pub(crate) fn bump_bytes(&mut self, n: usize) {
self.chars = self.as_str()[n..].chars();
}
/// Eats symbols while predicate returns true or until the end of file is reached.
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
// LLVM can inline all of this and compile it down to fast iteration over bytes.
while predicate(self.first()) && !self.is_eof() {
self.bump();
}
}
pub(crate) fn eat_until(&mut self, byte: u8) {
self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
Some(index) => self.as_str()[index..].chars(),
None => "".chars(),
}
}
/// Parses a token from the input string.
pub fn advance_token(&mut self) -> Token {
let Some(first_char) = self.bump() else {