feat: added syntax highlighting for code blocks in rustc --explain
This commit adds a heuristics-based syntax highlighter for the `rustc --explain` command. It uses `rsutc_lexer`'s lexer to parse input in tokens, and matches on them to determine their color.
This commit is contained in:
parent
d9617c8d9a
commit
67c45b739a
10 changed files with 249 additions and 26 deletions
|
|
@ -3809,6 +3809,7 @@ dependencies = [
|
|||
name = "rustc_driver_impl"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"ctrlc",
|
||||
"jiff",
|
||||
"libc",
|
||||
|
|
@ -3834,6 +3835,7 @@ dependencies = [
|
|||
"rustc_index",
|
||||
"rustc_infer",
|
||||
"rustc_interface",
|
||||
"rustc_lexer",
|
||||
"rustc_lint",
|
||||
"rustc_log",
|
||||
"rustc_macros",
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ edition = "2024"
|
|||
|
||||
[dependencies]
|
||||
# tidy-alphabetical-start
|
||||
anstyle = "1.0.13"
|
||||
jiff = { version = "0.2.5", default-features = false, features = ["std"] }
|
||||
rustc_abi = { path = "../rustc_abi" }
|
||||
rustc_ast = { path = "../rustc_ast" }
|
||||
|
|
@ -28,6 +29,7 @@ rustc_incremental = { path = "../rustc_incremental" }
|
|||
rustc_index = { path = "../rustc_index" }
|
||||
rustc_infer = { path = "../rustc_infer" }
|
||||
rustc_interface = { path = "../rustc_interface" }
|
||||
rustc_lexer = { path = "../rustc_lexer" }
|
||||
rustc_lint = { path = "../rustc_lint" }
|
||||
rustc_log = { path = "../rustc_log" }
|
||||
rustc_macros = { path = "../rustc_macros" }
|
||||
|
|
|
|||
159
compiler/rustc_driver_impl/src/highlighter.rs
Normal file
159
compiler/rustc_driver_impl/src/highlighter.rs
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
//! This module provides a syntax highlighter for Rust code.
|
||||
//! It is used by the `rustc --explain` command.
|
||||
//!
|
||||
//! The syntax highlighter uses `rustc_lexer`'s `tokenize`
|
||||
//! function to parse the Rust code into a `Vec` of tokens.
|
||||
//! The highlighter then highlights the tokens in the `Vec`,
|
||||
//! and writes the highlighted output to the buffer.
|
||||
use std::io::{self, Write};
|
||||
|
||||
use anstyle::{AnsiColor, Color, Effects, Style};
|
||||
use rustc_lexer::{LiteralKind, strip_shebang, tokenize};
|
||||
|
||||
const PRIMITIVE_TYPES: &'static [&str] = &[
|
||||
"i8", "i16", "i32", "i64", "i128", "isize", // signed integers
|
||||
"u8", "u16", "u32", "u64", "u128", "usize", // unsigned integers
|
||||
"f32", "f64", // floating point
|
||||
"char", "bool", // others
|
||||
];
|
||||
|
||||
const KEYWORDS: &'static [&str] = &[
|
||||
"static", "struct", "super", "trait", "true", "type", "unsafe", "use", "where", "while", "as",
|
||||
"async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum", "extern",
|
||||
"false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub",
|
||||
"ref",
|
||||
];
|
||||
|
||||
const STR_LITERAL_COLOR: AnsiColor = AnsiColor::Green;
|
||||
const OTHER_LITERAL_COLOR: AnsiColor = AnsiColor::BrightRed;
|
||||
const DERIVE_COLOR: AnsiColor = AnsiColor::BrightRed;
|
||||
const KEYWORD_COLOR: AnsiColor = AnsiColor::BrightMagenta;
|
||||
const TYPE_COLOR: AnsiColor = AnsiColor::Yellow;
|
||||
const FUNCTION_COLOR: AnsiColor = AnsiColor::Blue;
|
||||
const USE_COLOR: AnsiColor = AnsiColor::BrightMagenta;
|
||||
const PRIMITIVE_TYPE_COLOR: AnsiColor = AnsiColor::Cyan;
|
||||
|
||||
/// Highlight a Rust code string and write the highlighted
|
||||
/// output to the buffer. It serves as a wrapper around
|
||||
/// `Highlighter::highlight_rustc_lexer`. It is passed to
|
||||
/// `write_anstream_buf` in the `lib.rs` file.
|
||||
pub fn highlight(code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
|
||||
let mut highlighter = Highlighter::default();
|
||||
highlighter.highlight_rustc_lexer(code, buf)
|
||||
}
|
||||
|
||||
/// A syntax highlighter for Rust code
|
||||
/// It is used by the `rustc --explain` command.
|
||||
#[derive(Default)]
|
||||
pub struct Highlighter {
|
||||
/// Used to track if the previous token was a token
|
||||
/// that warrants the next token to be colored differently
|
||||
///
|
||||
/// For example, the keyword `fn` requires the next token
|
||||
/// (the function name) to be colored differently.
|
||||
prev_was_special: bool,
|
||||
/// Used to track the length of tokens that have been
|
||||
/// written so far. This is used to find the original
|
||||
/// lexeme for a token from the code string.
|
||||
len_accum: usize,
|
||||
}
|
||||
|
||||
impl Highlighter {
|
||||
/// Create a new highlighter
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Highlight a Rust code string and write the highlighted
|
||||
/// output to the buffer.
|
||||
pub fn highlight_rustc_lexer(&mut self, code: &str, buf: &mut Vec<u8>) -> io::Result<()> {
|
||||
use rustc_lexer::TokenKind;
|
||||
|
||||
// Remove shebang from code string
|
||||
let stripped_idx = strip_shebang(code).unwrap_or(0);
|
||||
let stripped_code = &code[stripped_idx..];
|
||||
self.len_accum = stripped_idx;
|
||||
let len_accum = &mut self.len_accum;
|
||||
let tokens = tokenize(stripped_code, rustc_lexer::FrontmatterAllowed::No);
|
||||
for token in tokens {
|
||||
let len = token.len as usize;
|
||||
// If the previous token was a special token, and this token is
|
||||
// not a whitespace token, then it should be colored differently
|
||||
let token_str = &code[*len_accum..*len_accum + len];
|
||||
if self.prev_was_special {
|
||||
if token_str != " " {
|
||||
self.prev_was_special = false;
|
||||
}
|
||||
let style = Style::new().fg_color(Some(Color::Ansi(AnsiColor::Blue)));
|
||||
write!(buf, "{style}{token_str}{style:#}")?;
|
||||
*len_accum += len;
|
||||
continue;
|
||||
}
|
||||
match token.kind {
|
||||
TokenKind::Ident => {
|
||||
let mut style = Style::new();
|
||||
// Match if an identifier is a (well-known) keyword
|
||||
if KEYWORDS.contains(&token_str) {
|
||||
if token_str == "fn" {
|
||||
self.prev_was_special = true;
|
||||
}
|
||||
style = style.fg_color(Some(Color::Ansi(KEYWORD_COLOR)));
|
||||
}
|
||||
// The `use` keyword is colored differently
|
||||
if matches!(token_str, "use") {
|
||||
style = style.fg_color(Some(Color::Ansi(USE_COLOR)));
|
||||
}
|
||||
// This heuristic test is to detect if the identifier is
|
||||
// a function call. If it is, then the function identifier is
|
||||
// colored differently.
|
||||
if code[*len_accum..*len_accum + len + 1].ends_with('(') {
|
||||
style = style.fg_color(Some(Color::Ansi(FUNCTION_COLOR)));
|
||||
}
|
||||
// The `derive` keyword is colored differently.
|
||||
if token_str == "derive" {
|
||||
style = style.fg_color(Some(Color::Ansi(DERIVE_COLOR)));
|
||||
}
|
||||
// This heuristic test is to detect if the identifier is
|
||||
// a type. If it is, then the identifier is colored differently.
|
||||
if matches!(token_str.chars().next().map(|c| c.is_uppercase()), Some(true)) {
|
||||
style = style.fg_color(Some(Color::Ansi(TYPE_COLOR)));
|
||||
}
|
||||
// This if statement is to detect if the identifier is a primitive type.
|
||||
if PRIMITIVE_TYPES.contains(&token_str) {
|
||||
style = style.fg_color(Some(Color::Ansi(PRIMITIVE_TYPE_COLOR)));
|
||||
}
|
||||
write!(buf, "{style}{token_str}{style:#}")?;
|
||||
}
|
||||
|
||||
// Color literals
|
||||
TokenKind::Literal { kind, suffix_start: _ } => {
|
||||
// Strings -> Green
|
||||
// Chars -> Green
|
||||
// Raw strings -> Green
|
||||
// C strings -> Green
|
||||
// Byte Strings -> Green
|
||||
// Other literals -> Bright Red (Orage-esque)
|
||||
let style = match kind {
|
||||
LiteralKind::Str { terminated: _ }
|
||||
| LiteralKind::Char { terminated: _ }
|
||||
| LiteralKind::RawStr { n_hashes: _ }
|
||||
| LiteralKind::CStr { terminated: _ } => {
|
||||
Style::new().fg_color(Some(Color::Ansi(STR_LITERAL_COLOR)))
|
||||
}
|
||||
_ => Style::new().fg_color(Some(Color::Ansi(OTHER_LITERAL_COLOR))),
|
||||
};
|
||||
write!(buf, "{style}{token_str}{style:#}")?;
|
||||
}
|
||||
_ => {
|
||||
// All other tokens are dimmed
|
||||
let style = Style::new()
|
||||
.fg_color(Some(Color::Ansi(AnsiColor::BrightWhite)))
|
||||
.effects(Effects::DIMMED);
|
||||
write!(buf, "{style}{token_str}{style:#}")?;
|
||||
}
|
||||
}
|
||||
*len_accum += len;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -86,6 +86,7 @@ pub mod args;
|
|||
pub mod pretty;
|
||||
#[macro_use]
|
||||
mod print;
|
||||
pub mod highlighter;
|
||||
mod session_diagnostics;
|
||||
|
||||
// Keep the OS parts of this `cfg` in sync with the `cfg` on the `libc`
|
||||
|
|
@ -521,7 +522,11 @@ fn show_md_content_with_pager(content: &str, color: ColorConfig) {
|
|||
let mdstream = markdown::MdStream::parse_str(content);
|
||||
let bufwtr = markdown::create_stdout_bufwtr();
|
||||
let mut mdbuf = Vec::new();
|
||||
if mdstream.write_anstream_buf(&mut mdbuf).is_ok() { Some((bufwtr, mdbuf)) } else { None }
|
||||
if mdstream.write_anstream_buf(&mut mdbuf, Some(&highlighter::highlight)).is_ok() {
|
||||
Some((bufwtr, mdbuf))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Try to print via the pager, pretty output if possible.
|
||||
|
|
|
|||
|
|
@ -18,9 +18,14 @@ impl<'a> MdStream<'a> {
|
|||
parse::entrypoint(s)
|
||||
}
|
||||
|
||||
/// Write formatted output to an anstream buffer
|
||||
pub fn write_anstream_buf(&self, buf: &mut Vec<u8>) -> io::Result<()> {
|
||||
term::entrypoint(self, buf)
|
||||
/// Write formatted output to a stdout buffer, optionally with
|
||||
/// a formatter for code blocks
|
||||
pub fn write_anstream_buf(
|
||||
&self,
|
||||
buf: &mut Vec<u8>,
|
||||
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
|
||||
) -> io::Result<()> {
|
||||
term::entrypoint(self, buf, formatter)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,29 +12,33 @@ thread_local! {
|
|||
static CURSOR: Cell<usize> = const { Cell::new(0) };
|
||||
/// Width of the terminal
|
||||
static WIDTH: Cell<usize> = const { Cell::new(DEFAULT_COLUMN_WIDTH) };
|
||||
|
||||
}
|
||||
|
||||
/// Print to terminal output to a buffer
|
||||
pub(crate) fn entrypoint(stream: &MdStream<'_>, buf: &mut Vec<u8>) -> io::Result<()> {
|
||||
#[cfg(not(test))]
|
||||
if let Some((w, _)) = termize::dimensions() {
|
||||
WIDTH.set(std::cmp::min(w, DEFAULT_COLUMN_WIDTH));
|
||||
}
|
||||
write_stream(stream, buf, None, 0)?;
|
||||
/// Print to the terminal output to a buffer
|
||||
/// optionally with a formatter for code blocks
|
||||
pub(crate) fn entrypoint(
|
||||
stream: &MdStream<'_>,
|
||||
buf: &mut Vec<u8>,
|
||||
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
|
||||
) -> io::Result<()> {
|
||||
write_stream(stream, buf, None, 0, formatter)?;
|
||||
buf.write_all(b"\n")
|
||||
}
|
||||
/// Write the buffer, reset to the default style after each
|
||||
|
||||
/// Write the buffer, reset to the default style after each,
|
||||
/// optionally with a formatter for code blocks
|
||||
fn write_stream(
|
||||
MdStream(stream): &MdStream<'_>,
|
||||
buf: &mut Vec<u8>,
|
||||
|
||||
default: Option<Style>,
|
||||
indent: usize,
|
||||
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
|
||||
) -> io::Result<()> {
|
||||
for tt in stream {
|
||||
write_tt(tt, buf, default, indent)?;
|
||||
write_tt(tt, buf, default, indent, formatter)?;
|
||||
}
|
||||
reset_opt_style(buf, default)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -43,12 +47,17 @@ fn write_tt(
|
|||
buf: &mut Vec<u8>,
|
||||
default: Option<Style>,
|
||||
indent: usize,
|
||||
formatter: Option<&(dyn Fn(&str, &mut Vec<u8>) -> io::Result<()> + 'static)>,
|
||||
) -> io::Result<()> {
|
||||
match tt {
|
||||
MdTree::CodeBlock { txt, lang: _ } => {
|
||||
reset_opt_style(buf, default)?;
|
||||
let style = Style::new().effects(Effects::DIMMED);
|
||||
write!(buf, "{style}{txt}{style:#}")?;
|
||||
if let Some(formatter) = formatter {
|
||||
formatter(txt, buf)?;
|
||||
} else {
|
||||
let style = Style::new().effects(Effects::DIMMED);
|
||||
write!(buf, "{style}{txt}{style:#}")?;
|
||||
}
|
||||
render_opt_style(buf, default)?;
|
||||
}
|
||||
MdTree::CodeInline(txt) => {
|
||||
|
|
@ -105,7 +114,7 @@ fn write_tt(
|
|||
};
|
||||
reset_opt_style(buf, default)?;
|
||||
write!(buf, "{cs}")?;
|
||||
write_stream(stream, buf, Some(cs), 0)?;
|
||||
write_stream(stream, buf, Some(cs), 0, None)?;
|
||||
write!(buf, "{cs:#}")?;
|
||||
render_opt_style(buf, default)?;
|
||||
buf.write_all(b"\n")?;
|
||||
|
|
@ -113,12 +122,12 @@ fn write_tt(
|
|||
MdTree::OrderedListItem(n, stream) => {
|
||||
let base = format!("{n}. ");
|
||||
write_wrapping(buf, &format!("{base:<4}"), indent, None, None)?;
|
||||
write_stream(stream, buf, None, indent + 4)?;
|
||||
write_stream(stream, buf, None, indent + 4, None)?;
|
||||
}
|
||||
MdTree::UnorderedListItem(stream) => {
|
||||
let base = "* ";
|
||||
write_wrapping(buf, &format!("{base:<4}"), indent, None, None)?;
|
||||
write_stream(stream, buf, None, indent + 4)?;
|
||||
write_stream(stream, buf, None, indent + 4, None)?;
|
||||
}
|
||||
// Patterns popped in previous step
|
||||
MdTree::Comment(_) | MdTree::LinkDef { .. } | MdTree::RefLink { .. } => unreachable!(),
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
[1m[4m[96mH1 Heading ]8;;http://docs.rs\with a link]8;;\[0m[0m
|
||||
[1m[4m[96mH1 Heading ]8;;http://docs.rs\with a link]8;;\[0m
|
||||
H1 content: [1msome words in bold[0m and [2mso does inline code[0m
|
||||
|
||||
[4m[96mH2 Heading[0m[0m
|
||||
[4m[96mH2 Heading[0m
|
||||
H2 content: [3msome words in italic[0m
|
||||
|
||||
[3m[96mH3 Heading[0m[0m
|
||||
[3m[96mH3 Heading[0m
|
||||
H3 content: [9mstrikethrough[0m text
|
||||
|
||||
[3m[4m[36mH4 Heading[0m[0m
|
||||
[3m[4m[36mH4 Heading[0m
|
||||
H4 content: A ]8;;https://docs.rs\simple link]8;;\ and a ]8;;http://docs.rs\remote-link]8;;\.
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------
|
||||
A section break was above. We can also do paragraph breaks:
|
||||
|
|
@ -24,7 +24,7 @@ Or ordered:
|
|||
elit quam, pulvinar ac risus in, dictum vehicula turpis. Vestibulum neque est, accumsan in cursus sit amet, dictum a nunc. Suspendisse
|
||||
aliquet, lorem eu eleifend accumsan, magna neque sodales nisi, a aliquet lectus leo eu sem.
|
||||
--------------------------------------------------------------------------------------------------------------------------------------------
|
||||
[4m[96mCode[0m[0m
|
||||
[4m[96mCode[0m
|
||||
Both [2minline code[0m and code blocks are supported:
|
||||
|
||||
[2m/// A rust enum
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ fn test_output() {
|
|||
let bless = std::env::var_os("RUSTC_BLESS").is_some_and(|v| v != "0");
|
||||
let ast = MdStream::parse_str(INPUT);
|
||||
let mut buffer = Vec::new();
|
||||
ast.write_anstream_buf(&mut buffer).unwrap();
|
||||
ast.write_anstream_buf(&mut buffer, None).unwrap();
|
||||
|
||||
let mut blessed = PathBuf::new();
|
||||
blessed.extend(OUTPUT_PATH);
|
||||
|
|
|
|||
29
tests/ui-fulldeps/explain_highlighter.rs
Normal file
29
tests/ui-fulldeps/explain_highlighter.rs
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
//@ run-pass
|
||||
//@ check-run-results
|
||||
|
||||
#![feature(rustc_private)]
|
||||
use std::io::Write;
|
||||
extern crate rustc_driver;
|
||||
extern crate rustc_driver_impl;
|
||||
|
||||
use rustc_driver_impl::highlighter::highlight;
|
||||
|
||||
const TEST_INPUT: &str = "
|
||||
struct Foo;
|
||||
|
||||
fn baz(x: i32) {
|
||||
// A function
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let foo = Foo;
|
||||
foo.bar();
|
||||
}
|
||||
";
|
||||
|
||||
fn main() {
|
||||
let mut buf = Vec::new();
|
||||
highlight(TEST_INPUT, &mut buf).unwrap();
|
||||
let mut stdout = std::io::stdout();
|
||||
stdout.write_all(&buf).unwrap();
|
||||
}
|
||||
12
tests/ui-fulldeps/explain_highlighter.run.stdout
Normal file
12
tests/ui-fulldeps/explain_highlighter.run.stdout
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
[2m[97m
|
||||
[0m[95mstruct[0m[2m[97m [0m[33mFoo[0m[2m[97m;[0m[2m[97m
|
||||
|
||||
[0m[95mfn[0m[34m [0m[34mbaz[0m[2m[97m([0mx[2m[97m:[0m[2m[97m [0m[36mi32[0m[2m[97m)[0m[2m[97m [0m[2m[97m{[0m[2m[97m
|
||||
[0m[2m[97m// A function[0m[2m[97m
|
||||
[0m[2m[97m}[0m[2m[97m
|
||||
|
||||
[0m[95mfn[0m[34m [0m[34mmain[0m[2m[97m([0m[2m[97m)[0m[2m[97m [0m[2m[97m{[0m[2m[97m
|
||||
[0m[95mlet[0m[2m[97m [0mfoo[2m[97m [0m[2m[97m=[0m[2m[97m [0m[33mFoo[0m[2m[97m;[0m[2m[97m
|
||||
[0mfoo[2m[97m.[0m[34mbar[0m[2m[97m([0m[2m[97m)[0m[2m[97m;[0m[2m[97m
|
||||
[0m[2m[97m}[0m[2m[97m
|
||||
[0m
|
||||
Loading…
Add table
Add a link
Reference in a new issue