feat: added syntax highlighting for code blocks in rustc --explain

This commit adds a heuristics-based syntax highlighter for the `rustc
--explain` command. It uses `rsutc_lexer`'s lexer to parse input in
tokens, and matches on them to determine their color.
This commit is contained in:
JayanAXHF 2026-01-10 01:37:51 +05:30
parent d9617c8d9a
commit 67c45b739a
10 changed files with 249 additions and 26 deletions

View file

@ -3809,6 +3809,7 @@ dependencies = [
name = "rustc_driver_impl"
version = "0.0.0"
dependencies = [
"anstyle",
"ctrlc",
"jiff",
"libc",
@ -3834,6 +3835,7 @@ dependencies = [
"rustc_index",
"rustc_infer",
"rustc_interface",
"rustc_lexer",
"rustc_lint",
"rustc_log",
"rustc_macros",

View file

@ -5,6 +5,7 @@ edition = "2024"
[dependencies]
# tidy-alphabetical-start
anstyle = "1.0.13"
jiff = { version = "0.2.5", default-features = false, features = ["std"] }
rustc_abi = { path = "../rustc_abi" }
rustc_ast = { path = "../rustc_ast" }
@ -28,6 +29,7 @@ rustc_incremental = { path = "../rustc_incremental" }
rustc_index = { path = "../rustc_index" }
rustc_infer = { path = "../rustc_infer" }
rustc_interface = { path = "../rustc_interface" }
rustc_lexer = { path = "../rustc_lexer" }
rustc_lint = { path = "../rustc_lint" }
rustc_log = { path = "../rustc_log" }
rustc_macros = { path = "../rustc_macros" }

View file

@ -0,0 +1,159 @@
//! This module provides a syntax highlighter for Rust code.
//! It is used by the `rustc --explain` command.
//!
//! The syntax highlighter uses `rustc_lexer`'s `tokenize`
//! function to parse the Rust code into a `Vec` of tokens.
//! The highlighter then highlights the tokens in the `Vec`,
//! and writes the highlighted output to the buffer.
use std::io::{self, Write};
use anstyle::{AnsiColor, Color, Effects, Style};
use rustc_lexer::{LiteralKind, strip_shebang, tokenize};
const PRIMITIVE_TYPES: &'static [&str] = &[
"i8", "i16", "i32", "i64", "i128", "isize", // signed integers
"u8", "u16", "u32", "u64", "u128", "usize", // unsigned integers
"f32", "f64", // floating point
"char", "bool", // others
];
const KEYWORDS: &'static [&str] = &[
"static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "as",
"async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
"false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub",
"ref",
];
const STR_LITERAL_COLOR: AnsiColor = AnsiColor::Green;
const OTHER_LITERAL_COLOR: AnsiColor = AnsiColor::BrightRed;
const DERIVE_COLOR: AnsiColor = AnsiColor::BrightRed;
const KEYWORD_COLOR: AnsiColor = AnsiColor::BrightMagenta;
const TYPE_COLOR: AnsiColor = AnsiColor::Yellow;
const FUNCTION_COLOR: AnsiColor = AnsiColor::Blue;
const USE_COLOR: AnsiColor = AnsiColor::BrightMagenta;
const PRIMITIVE_TYPE_COLOR: AnsiColor = AnsiColor::Cyan;
/// Highlight a Rust code string and write the highlighted
/// output to the buffer. It serves as a wrapper around
/// `Highlighter::highlight_rustc_lexer`. It is passed to
/// `write_anstream_buf` in the `lib.rs` file.
pub fn highlight(code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
let mut highlighter = Highlighter::default();
highlighter.highlight_rustc_lexer(code, buf)
}
/// A syntax highlighter for Rust code
/// It is used by the `rustc --explain` command.
#[derive(Default)]
pub struct Highlighter {
/// Used to track if the previous token was a token
/// that warrants the next token to be colored differently
///
/// For example, the keyword `fn` requires the next token
/// (the function name) to be colored differently.
prev_was_special: bool,
/// Used to track the length of tokens that have been
/// written so far. This is used to find the original
/// lexeme for a token from the code string.
len_accum: usize,
}
impl Highlighter {
/// Create a new highlighter
pub fn new() -> Self {
Self::default()
}
/// Highlight a Rust code string and write the highlighted
/// output to the buffer.
pub fn highlight_rustc_lexer(&mut self, code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
use rustc_lexer::TokenKind;
// Remove shebang from code string
let stripped_idx = strip_shebang(code).unwrap_or(0);
let stripped_code = &code[stripped_idx..];
self.len_accum = stripped_idx;
let len_accum = &mut self.len_accum;
let tokens = tokenize(stripped_code, rustc_lexer::FrontmatterAllowed::No);
for token in tokens {
let len = token.len as usize;
// If the previous token was a special token, and this token is
// not a whitespace token, then it should be colored differently
let token_str = &code[*len_accum..*len_accum + len];
if self.prev_was_special {
if token_str != " " {
self.prev_was_special = false;
}
let style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Blue)));
write!(buf, "{style}{token_str}{style:#}")?;
*len_accum += len;
continue;
}
match token.kind {
TokenKind::Ident => {
let mut style = Style::new();
// Match if an identifier is a (well-known) keyword
if KEYWORDS.contains(&token_str) {
if token_str == "fn" {
self.prev_was_special = true;
}
style = style.fg_color(Some(Color::Ansi(KEYWORD_COLOR)));
}
// The `use` keyword is colored differently
if matches!(token_str, "use") {
style = style.fg_color(Some(Color::Ansi(USE_COLOR)));
}
// This heuristic test is to detect if the identifier is
// a function call. If it is, then the function identifier is
// colored differently.
if code[*len_accum..*len_accum + len + 1].ends_with('(') {
style = style.fg_color(Some(Color::Ansi(FUNCTION_COLOR)));
}
// The `derive` keyword is colored differently.
if token_str == "derive" {
style = style.fg_color(Some(Color::Ansi(DERIVE_COLOR)));
}
// This heuristic test is to detect if the identifier is
// a type. If it is, then the identifier is colored differently.
if matches!(token_str.chars().next().map(|c| c.is_uppercase()), Some(true)) {
style = style.fg_color(Some(Color::Ansi(TYPE_COLOR)));
}
// This if statement is to detect if the identifier is a primitive type.
if PRIMITIVE_TYPES.contains(&token_str) {
style = style.fg_color(Some(Color::Ansi(PRIMITIVE_TYPE_COLOR)));
}
write!(buf, "{style}{token_str}{style:#}")?;
}
// Color literals
TokenKind::Literal { kind, suffix_start: _ } => {
// Strings -> Green
// Chars -> Green
// Raw strings -> Green
// C strings -> Green
// Byte Strings -> Green
// Other literals -> Bright Red (Orage-esque)
let style = match kind {
LiteralKind::Str { terminated: _ }
| LiteralKind::Char { terminated: _ }
| LiteralKind::RawStr { n_hashes: _ }
| LiteralKind::CStr { terminated: _ } => {
Style::new().fg_color(Some(Color::Ansi(STR_LITERAL_COLOR)))
}
_ => Style::new().fg_color(Some(Color::Ansi(OTHER_LITERAL_COLOR))),
};
write!(buf, "{style}{token_str}{style:#}")?;
}
_ => {
// All other tokens are dimmed
let style = Style::new()
.fg_color(Some(Color::Ansi(AnsiColor::BrightWhite)))
.effects(Effects::DIMMED);
write!(buf, "{style}{token_str}{style:#}")?;
}
}
*len_accum += len;
}
Ok(())
}
}

View file

@ -86,6 +86,7 @@ pub mod args;
pub mod pretty;
#[macro_use]
mod print;
pub mod highlighter;
mod session_diagnostics;
// Keep the OS parts of this `cfg` in sync with the `cfg` on the `libc`
@ -521,7 +522,11 @@ fn show_md_content_with_pager(content: &str, color: ColorConfig) {
let mdstream = markdown::MdStream::parse_str(content);
let bufwtr = markdown::create_stdout_bufwtr();
let mut mdbuf = Vec::new();
if mdstream.write_anstream_buf(&mut mdbuf).is_ok() { Some((bufwtr, mdbuf)) } else { None }
if mdstream.write_anstream_buf(&mut mdbuf, Some(&highlighter::highlight)).is_ok() {
Some((bufwtr, mdbuf))
} else {
None
}
};
// Try to print via the pager, pretty output if possible.

View file

@ -18,9 +18,14 @@ impl<'a> MdStream<'a> {
parse::entrypoint(s)
}
/// Write formatted output to an anstream buffer
pub fn write_anstream_buf(&self, buf: &mut Vec<u8>) -> io::Result<()> {
term::entrypoint(self, buf)
/// Write formatted output to a stdout buffer, optionally with
/// a formatter for code blocks
pub fn write_anstream_buf(
&self,
buf: &mut Vec<u8>,
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
) -> io::Result<()> {
term::entrypoint(self, buf, formatter)
}
}

View file

@ -12,29 +12,33 @@ thread_local! {
static CURSOR: Cell<usize> = const { Cell::new(0) };
/// Width of the terminal
static WIDTH: Cell<usize> = const { Cell::new(DEFAULT_COLUMN_WIDTH) };
}
/// Print to terminal output to a buffer
pub(crate) fn entrypoint(stream: &MdStream<'_>, buf: &mut Vec<u8>) -> io::Result<()> {
#[cfg(not(test))]
if let Some((w, _)) = termize::dimensions() {
WIDTH.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH));
}
write_stream(stream, buf, None, 0)?;
/// Print to the terminal output to a buffer
/// optionally with a formatter for code blocks
pub(crate) fn entrypoint(
stream: &MdStream<'_>,
buf: &mut Vec<u8>,
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
) -> io::Result<()> {
write_stream(stream, buf, None, 0, formatter)?;
buf.write_all(b"\n")
}
/// Write the buffer, reset to the default style after each
/// Write the buffer, reset to the default style after each,
/// optionally with a formatter for code blocks
fn write_stream(
MdStream(stream): &MdStream<'_>,
buf: &mut Vec<u8>,
default: Option<Style>,
indent: usize,
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
) -> io::Result<()> {
for tt in stream {
write_tt(tt, buf, default, indent)?;
write_tt(tt, buf, default, indent, formatter)?;
}
reset_opt_style(buf, default)?;
Ok(())
}
@ -43,12 +47,17 @@ fn write_tt(
buf: &mut Vec<u8>,
default: Option<Style>,
indent: usize,
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
) -> io::Result<()> {
match tt {
MdTree::CodeBlock { txt, lang: _ } => {
reset_opt_style(buf, default)?;
let style = Style::new().effects(Effects::DIMMED);
write!(buf, "{style}{txt}{style:#}")?;
if let Some(formatter) = formatter {
formatter(txt, buf)?;
} else {
let style = Style::new().effects(Effects::DIMMED);
write!(buf, "{style}{txt}{style:#}")?;
}
render_opt_style(buf, default)?;
}
MdTree::CodeInline(txt) => {
@ -105,7 +114,7 @@ fn write_tt(
};
reset_opt_style(buf, default)?;
write!(buf, "{cs}")?;
write_stream(stream, buf, Some(cs), 0)?;
write_stream(stream, buf, Some(cs), 0, None)?;
write!(buf, "{cs:#}")?;
render_opt_style(buf, default)?;
buf.write_all(b"\n")?;
@ -113,12 +122,12 @@ fn write_tt(
MdTree::OrderedListItem(n, stream) => {
let base = format!("{n}. ");
write_wrapping(buf, &format!("{base:<4}"), indent, None, None)?;
write_stream(stream, buf, None, indent + 4)?;
write_stream(stream, buf, None, indent + 4, None)?;
}
MdTree::UnorderedListItem(stream) => {
let base = "* ";
write_wrapping(buf, &format!("{base:<4}"), indent, None, None)?;
write_stream(stream, buf, None, indent + 4)?;
write_stream(stream, buf, None, indent + 4, None)?;
}
// Patterns popped in previous step
MdTree::Comment(_) | MdTree::LinkDef { .. } | MdTree::RefLink { .. } => unreachable!(),

View file

@ -1,13 +1,13 @@
H1 Heading ]8;;http://docs.rs\with a link]8;;\
H1 Heading ]8;;http://docs.rs\with a link]8;;\
H1 content: some words in bold and so does inline code
H2 Heading
H2 Heading
H2 content: some words in italic
H3 Heading
H3 Heading
H3 content: strikethrough text
H4 Heading
H4 Heading
H4 content: A ]8;;https://docs.rs\simple link]8;;\ and a ]8;;http://docs.rs\remote-link]8;;\.
--------------------------------------------------------------------------------------------------------------------------------------------
A section break was above. We can also do paragraph breaks:
@ -24,7 +24,7 @@ Or ordered:
elit quam, pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan in cursus sit amet, dictum a nunc. Suspendisse
aliquet, lorem eu eleifend accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
--------------------------------------------------------------------------------------------------------------------------------------------
Code
Code
Both inline code and code blocks are supported:
/// A rust enum

View file

@ -65,7 +65,7 @@ fn test_output() {
let bless = std::env::var_os("RUSTC_BLESS").is_some_and(|v| v != "0");
let ast = MdStream::parse_str(INPUT);
let mut buffer = Vec::new();
ast.write_anstream_buf(&mut buffer).unwrap();
ast.write_anstream_buf(&mut buffer, None).unwrap();
let mut blessed = PathBuf::new();
blessed.extend(OUTPUT_PATH);

View file

@ -0,0 +1,29 @@
//@ run-pass
//@ check-run-results
#![feature(rustc_private)]
use std::io::Write;
extern crate rustc_driver;
extern crate rustc_driver_impl;
use rustc_driver_impl::highlighter::highlight;
const TEST_INPUT: &str = "
struct Foo;
fn baz(x: i32) {
// A function
}
fn main() {
let foo = Foo;
foo.bar();
}
";
fn main() {
let mut buf = Vec::new();
highlight(TEST_INPUT, &mut buf).unwrap();
let mut stdout = std::io::stdout();
stdout.write_all(&buf).unwrap();
}

View file

@ -0,0 +1,12 @@

struct Foo;
fn baz(x: i32) {
// A function
}
fn main() {
let foo = Foo;
foo.bar();
}