Rollup merge of #150895 - rustc_colored_explain, r=Kivooeo

rustc_errors: Add (heuristic) Syntax Highlighting for `rustc --explain`

This PR adds a feature that enables `rustc --explain <error>` to have syntax highlighted code blocks. Due to performance, size and complexity constraints, the highlighter is very heuristc, relying on conventions for capitalizations and such to infer what an identifier represents. The details for the implementation are specified below.
# Changes
1. Change `term::entrypoint` to `term::entrypoint_with_formatter`, which takes an optional third argument, which is a function pointer to a formatter. ([compiler/rustc_errors/src/markdown/mod.rs](https://github.com/rust-lang/rust/compare/main...JayanAXHF:rust:rustc_colored_explain?expand=1#diff-a6e139cadbc2e6922d816eb08f9e2c7b48304d09e6588227e2b70215c4f0725c))
2. Change `MdStream::write_anstream_buf` to be a wrapper around a new function, `MdStream::write_anstream_buf_with_formatter`, which takes a function pointer to a formatter. ([compiler/rustc_errors/src/markdown/mod.rs](https://github.com/rust-lang/rust/compare/main...JayanAXHF:rust:rustc_colored_explain?expand=1#diff-a6e139cadbc2e6922d816eb08f9e2c7b48304d09e6588227e2b70215c4f0725c))
3. Change [`compiler/rustc_driver_impl/src/lib.rs`](https://github.com/rust-lang/rust/compare/main...JayanAXHF:rust:rustc_colored_explain?expand=1#diff-39877a2556ea309c89384956740d5892a59cef024aa9473cce16bbdd99287937) to call `MdStream::write_anstream_buf_with_formatter` instead of `MdStream::write_anstream_buf`.
4. Add a `compiler/rustc_driver_impl/src/highlighter.rs` file, which contains the actual syntax highlighter.

# Implementation Details
1. The highlighter starts from the `highlight` function defined in `compiler/rustc_driver_impl/src/highlighter.rs`. It creates a new instance of the `Highlighter` struct, and calls its `highlight_rustc_lexer` function to start highlighting.
2. The `highlight_rustc_lexer` function uses `rustc_lexer` to lex the code into `Token`s. `rustc_lexer` was chosen since it preserves the newlines after scanning.
3. Based on the kind of token (`TokenKind`), we color the corresponding lexeme.
## Highlighter Implementation
### Identifiers
1. All identifiers that match a (non-exhaustive and minimal) list of keywords are coloured magenta.
2. An identifier that begins with a capital letter is assumed as a type. There is no distinction between a `Trait` and a type, since that would involve name resolution, and the parts of `rustc` that perform name resolution on code do not preserve the original formatting. (An attempt to use `rustc_parse`'s lexer and `TokenStream` was made, which was then printed with the pretty printer, but failed to preserve the formatting and was generally more complex to work with)
3. An identifier that is immediately followed by a parenthesis is recognized as a function identifier, and coloured blue.
## Literals
5. A `String` literal (or its corresponding `Raw`, `C` and `Byte` versions) is colored green.
6. All other literals are colored bright red (orange-esque)
## Everything Else

Everything else is colored bright white and dimmed, to create a grayish colour.

---
# Demo
<img width="1864" height="2136" alt="image" src="https://github.com/user-attachments/assets/b17d3a71-e641-4457-be85-5e5b1cea2954" />

<caption> Command: <code>rustc --explain E0520</code> </caption>

---
This description was not generated by an LLM (:p)

cc: @bjorn3
This commit is contained in:
Jonathan Brouwer 2026-01-19 20:53:21 +01:00 committed by GitHub
commit 8a22babce9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 249 additions and 26 deletions

View file

@ -3767,6 +3767,7 @@ dependencies = [
name = "rustc_driver_impl"
version = "0.0.0"
dependencies = [
"anstyle",
"ctrlc",
"jiff",
"libc",
@ -3792,6 +3793,7 @@ dependencies = [
"rustc_index",
"rustc_infer",
"rustc_interface",
"rustc_lexer",
"rustc_lint",
"rustc_log",
"rustc_macros",

View file

@ -5,6 +5,7 @@ edition = "2024"
[dependencies]
# tidy-alphabetical-start
anstyle = "1.0.13"
jiff = { version = "0.2.5", default-features = false, features = ["std"] }
rustc_abi = { path = "../rustc_abi" }
rustc_ast = { path = "../rustc_ast" }
@ -28,6 +29,7 @@ rustc_incremental = { path = "../rustc_incremental" }
rustc_index = { path = "../rustc_index" }
rustc_infer = { path = "../rustc_infer" }
rustc_interface = { path = "../rustc_interface" }
rustc_lexer = { path = "../rustc_lexer" }
rustc_lint = { path = "../rustc_lint" }
rustc_log = { path = "../rustc_log" }
rustc_macros = { path = "../rustc_macros" }

View file

@ -0,0 +1,159 @@
//! This module provides a syntax highlighter for Rust code.
//! It is used by the `rustc --explain` command.
//!
//! The syntax highlighter uses `rustc_lexer`'s `tokenize`
//! function to parse the Rust code into a `Vec` of tokens.
//! The highlighter then highlights the tokens in the `Vec`,
//! and writes the highlighted output to the buffer.
use std::io::{self, Write};
use anstyle::{AnsiColor, Color, Effects, Style};
use rustc_lexer::{LiteralKind, strip_shebang, tokenize};
const PRIMITIVE_TYPES: &'static [&str] = &[
"i8", "i16", "i32", "i64", "i128", "isize", // signed integers
"u8", "u16", "u32", "u64", "u128", "usize", // unsigned integers
"f32", "f64", // floating point
"char", "bool", // others
];
const KEYWORDS: &'static [&str] = &[
"static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "as",
"async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
"false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub",
"ref",
];
const STR_LITERAL_COLOR: AnsiColor = AnsiColor::Green;
const OTHER_LITERAL_COLOR: AnsiColor = AnsiColor::BrightRed;
const DERIVE_COLOR: AnsiColor = AnsiColor::BrightRed;
const KEYWORD_COLOR: AnsiColor = AnsiColor::BrightMagenta;
const TYPE_COLOR: AnsiColor = AnsiColor::Yellow;
const FUNCTION_COLOR: AnsiColor = AnsiColor::Blue;
const USE_COLOR: AnsiColor = AnsiColor::BrightMagenta;
const PRIMITIVE_TYPE_COLOR: AnsiColor = AnsiColor::Cyan;
/// Highlight a Rust code string and write the highlighted
/// output to the buffer. It serves as a wrapper around
/// `Highlighter::highlight_rustc_lexer`. It is passed to
/// `write_anstream_buf` in the `lib.rs` file.
pub fn highlight(code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
let mut highlighter = Highlighter::default();
highlighter.highlight_rustc_lexer(code, buf)
}
/// A syntax highlighter for Rust code
/// It is used by the `rustc --explain` command.
#[derive(Default)]
pub struct Highlighter {
/// Used to track if the previous token was a token
/// that warrants the next token to be colored differently
///
/// For example, the keyword `fn` requires the next token
/// (the function name) to be colored differently.
prev_was_special: bool,
/// Used to track the length of tokens that have been
/// written so far. This is used to find the original
/// lexeme for a token from the code string.
len_accum: usize,
}
impl Highlighter {
/// Create a new highlighter
pub fn new() -> Self {
Self::default()
}
/// Highlight a Rust code string and write the highlighted
/// output to the buffer.
pub fn highlight_rustc_lexer(&mut self, code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
use rustc_lexer::TokenKind;
// Remove shebang from code string
let stripped_idx = strip_shebang(code).unwrap_or(0);
let stripped_code = &code[stripped_idx..];
self.len_accum = stripped_idx;
let len_accum = &mut self.len_accum;
let tokens = tokenize(stripped_code, rustc_lexer::FrontmatterAllowed::No);
for token in tokens {
let len = token.len as usize;
// If the previous token was a special token, and this token is
// not a whitespace token, then it should be colored differently
let token_str = &code[*len_accum..*len_accum + len];
if self.prev_was_special {
if token_str != " " {
self.prev_was_special = false;
}
let style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Blue)));
write!(buf, "{style}{token_str}{style:#}")?;
*len_accum += len;
continue;
}
match token.kind {
TokenKind::Ident => {
let mut style = Style::new();
// Match if an identifier is a (well-known) keyword
if KEYWORDS.contains(&token_str) {
if token_str == "fn" {
self.prev_was_special = true;
}
style = style.fg_color(Some(Color::Ansi(KEYWORD_COLOR)));
}
// The `use` keyword is colored differently
if matches!(token_str, "use") {
style = style.fg_color(Some(Color::Ansi(USE_COLOR)));
}
// This heuristic test is to detect if the identifier is
// a function call. If it is, then the function identifier is
// colored differently.
if code[*len_accum..*len_accum + len + 1].ends_with('(') {
style = style.fg_color(Some(Color::Ansi(FUNCTION_COLOR)));
}
// The `derive` keyword is colored differently.
if token_str == "derive" {
style = style.fg_color(Some(Color::Ansi(DERIVE_COLOR)));
}
// This heuristic test is to detect if the identifier is
// a type. If it is, then the identifier is colored differently.
if matches!(token_str.chars().next().map(|c| c.is_uppercase()), Some(true)) {
style = style.fg_color(Some(Color::Ansi(TYPE_COLOR)));
}
// This if statement is to detect if the identifier is a primitive type.
if PRIMITIVE_TYPES.contains(&token_str) {
style = style.fg_color(Some(Color::Ansi(PRIMITIVE_TYPE_COLOR)));
}
write!(buf, "{style}{token_str}{style:#}")?;
}
// Color literals
TokenKind::Literal { kind, suffix_start: _ } => {
// Strings -> Green
// Chars -> Green
// Raw strings -> Green
// C strings -> Green
// Byte Strings -> Green
// Other literals -> Bright Red (Orage-esque)
let style = match kind {
LiteralKind::Str { terminated: _ }
| LiteralKind::Char { terminated: _ }
| LiteralKind::RawStr { n_hashes: _ }
| LiteralKind::CStr { terminated: _ } => {
Style::new().fg_color(Some(Color::Ansi(STR_LITERAL_COLOR)))
}
_ => Style::new().fg_color(Some(Color::Ansi(OTHER_LITERAL_COLOR))),
};
write!(buf, "{style}{token_str}{style:#}")?;
}
_ => {
// All other tokens are dimmed
let style = Style::new()
.fg_color(Some(Color::Ansi(AnsiColor::BrightWhite)))
.effects(Effects::DIMMED);
write!(buf, "{style}{token_str}{style:#}")?;
}
}
*len_accum += len;
}
Ok(())
}
}

View file

@ -85,6 +85,7 @@ pub mod args;
pub mod pretty;
#[macro_use]
mod print;
pub mod highlighter;
mod session_diagnostics;
// Keep the OS parts of this `cfg` in sync with the `cfg` on the `libc`
@ -526,7 +527,11 @@ fn show_md_content_with_pager(content: &str, color: ColorConfig) {
let mdstream = markdown::MdStream::parse_str(content);
let bufwtr = markdown::create_stdout_bufwtr();
let mut mdbuf = Vec::new();
if mdstream.write_anstream_buf(&mut mdbuf).is_ok() { Some((bufwtr, mdbuf)) } else { None }
if mdstream.write_anstream_buf(&mut mdbuf, Some(&highlighter::highlight)).is_ok() {
Some((bufwtr, mdbuf))
} else {
None
}
};
// Try to print via the pager, pretty output if possible.

View file

@ -18,9 +18,14 @@ impl<'a> MdStream<'a> {
parse::entrypoint(s)
}
/// Write formatted output to an anstream buffer
pub fn write_anstream_buf(&self, buf: &mut Vec<u8>) -> io::Result<()> {
term::entrypoint(self, buf)
/// Write formatted output to a stdout buffer, optionally with
/// a formatter for code blocks
pub fn write_anstream_buf(
&self,
buf: &mut Vec<u8>,
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
) -> io::Result<()> {
term::entrypoint(self, buf, formatter)
}
}

View file

@ -12,29 +12,33 @@ thread_local! {
static CURSOR: Cell<usize> = const { Cell::new(0) };
/// Width of the terminal
static WIDTH: Cell<usize> = const { Cell::new(DEFAULT_COLUMN_WIDTH) };
}
/// Print to terminal output to a buffer
pub(crate) fn entrypoint(stream: &MdStream<'_>, buf: &mut Vec<u8>) -> io::Result<()> {
#[cfg(not(test))]
if let Some((w, _)) = termize::dimensions() {
WIDTH.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH));
}
write_stream(stream, buf, None, 0)?;
/// Print to the terminal output to a buffer
/// optionally with a formatter for code blocks
pub(crate) fn entrypoint(
stream: &MdStream<'_>,
buf: &mut Vec<u8>,
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
) -> io::Result<()> {
write_stream(stream, buf, None, 0, formatter)?;
buf.write_all(b"\n")
}
/// Write the buffer, reset to the default style after each
/// Write the buffer, reset to the default style after each,
/// optionally with a formatter for code blocks
fn write_stream(
MdStream(stream): &MdStream<'_>,
buf: &mut Vec<u8>,
default: Option<Style>,
indent: usize,
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
) -> io::Result<()> {
for tt in stream {
write_tt(tt, buf, default, indent)?;
write_tt(tt, buf, default, indent, formatter)?;
}
reset_opt_style(buf, default)?;
Ok(())
}
@ -43,12 +47,17 @@ fn write_tt(
buf: &mut Vec<u8>,
default: Option<Style>,
indent: usize,
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
) -> io::Result<()> {
match tt {
MdTree::CodeBlock { txt, lang: _ } => {
reset_opt_style(buf, default)?;
let style = Style::new().effects(Effects::DIMMED);
write!(buf, "{style}{txt}{style:#}")?;
if let Some(formatter) = formatter {
formatter(txt, buf)?;
} else {
let style = Style::new().effects(Effects::DIMMED);
write!(buf, "{style}{txt}{style:#}")?;
}
render_opt_style(buf, default)?;
}
MdTree::CodeInline(txt) => {
@ -105,7 +114,7 @@ fn write_tt(
};
reset_opt_style(buf, default)?;
write!(buf, "{cs}")?;
write_stream(stream, buf, Some(cs), 0)?;
write_stream(stream, buf, Some(cs), 0, None)?;
write!(buf, "{cs:#}")?;
render_opt_style(buf, default)?;
buf.write_all(b"\n")?;
@ -113,12 +122,12 @@ fn write_tt(
MdTree::OrderedListItem(n, stream) => {
let base = format!("{n}. ");
write_wrapping(buf, &format!("{base:<4}"), indent, None, None)?;
write_stream(stream, buf, None, indent + 4)?;
write_stream(stream, buf, None, indent + 4, None)?;
}
MdTree::UnorderedListItem(stream) => {
let base = "* ";
write_wrapping(buf, &format!("{base:<4}"), indent, None, None)?;
write_stream(stream, buf, None, indent + 4)?;
write_stream(stream, buf, None, indent + 4, None)?;
}
// Patterns popped in previous step
MdTree::Comment(_) | MdTree::LinkDef { .. } | MdTree::RefLink { .. } => unreachable!(),

View file

@ -1,13 +1,13 @@
H1 Heading ]8;;http://docs.rs\with a link]8;;\
H1 Heading ]8;;http://docs.rs\with a link]8;;\
H1 content: some words in bold and so does inline code
H2 Heading
H2 Heading
H2 content: some words in italic
H3 Heading
H3 Heading
H3 content: strikethrough text
H4 Heading
H4 Heading
H4 content: A ]8;;https://docs.rs\simple link]8;;\ and a ]8;;http://docs.rs\remote-link]8;;\.
--------------------------------------------------------------------------------------------------------------------------------------------
A section break was above. We can also do paragraph breaks:
@ -24,7 +24,7 @@ Or ordered:
elit quam, pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan in cursus sit amet, dictum a nunc. Suspendisse
aliquet, lorem eu eleifend accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
--------------------------------------------------------------------------------------------------------------------------------------------
Code
Code
Both inline code and code blocks are supported:
/// A rust enum

View file

@ -65,7 +65,7 @@ fn test_output() {
let bless = std::env::var_os("RUSTC_BLESS").is_some_and(|v| v != "0");
let ast = MdStream::parse_str(INPUT);
let mut buffer = Vec::new();
ast.write_anstream_buf(&mut buffer).unwrap();
ast.write_anstream_buf(&mut buffer, None).unwrap();
let mut blessed = PathBuf::new();
blessed.extend(OUTPUT_PATH);

View file

@ -0,0 +1,29 @@
//@ run-pass
//@ check-run-results
#![feature(rustc_private)]
use std::io::Write;
extern crate rustc_driver;
extern crate rustc_driver_impl;
use rustc_driver_impl::highlighter::highlight;
const TEST_INPUT: &str = "
struct Foo;
fn baz(x: i32) {
// A function
}
fn main() {
let foo = Foo;
foo.bar();
}
";
fn main() {
let mut buf = Vec::new();
highlight(TEST_INPUT, &mut buf).unwrap();
let mut stdout = std::io::stdout();
stdout.write_all(&buf).unwrap();
}

View file

@ -0,0 +1,12 @@

struct Foo;
fn baz(x: i32) {
// A function
}
fn main() {
let foo = Foo;
foo.bar();
}