feat(lexer): Allow including frontmatter with 'tokenize'

This commit is contained in:
Ed Page 2025-07-09 16:40:22 -05:00
parent 425cd0f571
commit 45a1e492b1
12 changed files with 47 additions and 38 deletions

View file

@ -273,14 +273,15 @@ pub fn strip_shebang(input: &str) -> Option<usize> {
if let Some(input_tail) = input.strip_prefix("#!") {
// Ok, this is a shebang but if the next non-whitespace token is `[`,
// then it may be valid Rust code, so consider it Rust code.
let next_non_whitespace_token = tokenize(input_tail).map(|tok| tok.kind).find(|tok| {
!matches!(
tok,
TokenKind::Whitespace
| TokenKind::LineComment { doc_style: None }
| TokenKind::BlockComment { doc_style: None, .. }
)
});
let next_non_whitespace_token =
tokenize(input_tail, FrontmatterAllowed::No).map(|tok| tok.kind).find(|tok| {
!matches!(
tok,
TokenKind::Whitespace
| TokenKind::LineComment { doc_style: None }
| TokenKind::BlockComment { doc_style: None, .. }
)
});
if next_non_whitespace_token != Some(TokenKind::OpenBracket) {
// No other choice than to consider this a shebang.
return Some(2 + input_tail.lines().next().unwrap_or_default().len());
@ -303,8 +304,16 @@ pub fn validate_raw_str(input: &str, prefix_len: u32) -> Result<(), RawStrError>
}
/// Creates an iterator that produces tokens from the input string.
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> {
let mut cursor = Cursor::new(input, FrontmatterAllowed::No);
///
/// When parsing a full Rust document,
/// first [`strip_shebang`] and then allow frontmatters with [`FrontmatterAllowed::Yes`].
///
/// When tokenizing a slice of a document, be sure to disallow frontmatters with [`FrontmatterAllowed::No`]
pub fn tokenize(
input: &str,
frontmatter_allowed: FrontmatterAllowed,
) -> impl Iterator<Item = Token> {
let mut cursor = Cursor::new(input, frontmatter_allowed);
std::iter::from_fn(move || {
let token = cursor.advance_token();
if token.kind != TokenKind::Eof { Some(token) } else { None }

View file

@ -125,7 +125,8 @@ fn test_valid_shebang() {
}
fn check_lexing(src: &str, expect: Expect) {
let actual: String = tokenize(src).map(|token| format!("{:?}\n", token)).collect();
let actual: String =
tokenize(src, FrontmatterAllowed::No).map(|token| format!("{:?}\n", token)).collect();
expect.assert_eq(&actual)
}