Introduce ByteSymbol.

It's like `Symbol` but for byte strings. The interner is now used for
both `Symbol` and `ByteSymbol`. E.g. if you intern `"dog"` and `b"dog"`
you'll get a `Symbol` and a `ByteSymbol` with the same index and the
characters will only be stored once.

The motivation for this is to eliminate the `Arc`s in `ast::LitKind`, to
make `ast::LitKind` impl `Copy`, and to avoid the need to arena-allocate
`ast::LitKind` in HIR. The latter change reduces peak memory by a
non-trivial amount on literal-heavy benchmarks such as `deep-vector` and
`tuple-stress`.

`Encoder`, `Decoder`, `SpanEncoder`, and `SpanDecoder` all get some
changes so that they can handle normal strings and byte strings.

This change does slow down compilation of programs that use
`include_bytes!` on large files, because the contents of those files are
now interned (hashed). This makes `include_bytes!` more similar to
`include_str!`, though `include_bytes!` contents still aren't escaped,
and hashing is still much cheaper than escaping.
This commit is contained in:
Nicholas Nethercote 2025-06-02 08:59:29 +10:00
parent ed2d759783
commit 478f8287c0
46 changed files with 449 additions and 269 deletions

View file

@ -74,7 +74,7 @@ impl ApproxConstant {
}
impl LateLintPass<'_> for ApproxConstant {
fn check_lit(&mut self, cx: &LateContext<'_>, _hir_id: HirId, lit: &Lit, _negated: bool) {
fn check_lit(&mut self, cx: &LateContext<'_>, _hir_id: HirId, lit: Lit, _negated: bool) {
match lit.node {
LitKind::Float(s, LitFloatType::Suffixed(fty)) => match fty {
FloatTy::F16 => self.check_known_consts(cx, lit.span, s, "f16"),

View file

@ -42,7 +42,7 @@ fn extract_bool_lit(e: &Expr<'_>) -> Option<bool> {
}) = e.kind
&& !e.span.from_expansion()
{
Some(*b)
Some(b)
} else {
None
}

View file

@ -46,7 +46,7 @@ pub(super) fn check(cx: &LateContext<'_>, expr: &Expr<'_>, from: &Expr<'_>, to:
fn is_expr_const_aligned(cx: &LateContext<'_>, expr: &Expr<'_>, to: &Ty<'_>) -> bool {
match expr.kind {
ExprKind::Call(fun, _) => is_align_of_call(cx, fun, to),
ExprKind::Lit(lit) => is_literal_aligned(cx, lit, to),
ExprKind::Lit(lit) => is_literal_aligned(cx, &lit, to),
_ => false,
}
}

View file

@ -243,7 +243,7 @@ fn lint_unnecessary_cast(
);
}
fn get_numeric_literal<'e>(expr: &'e Expr<'e>) -> Option<&'e Lit> {
fn get_numeric_literal<'e>(expr: &'e Expr<'e>) -> Option<Lit> {
match expr.kind {
ExprKind::Lit(lit) => Some(lit),
ExprKind::Unary(UnOp::Neg, e) => {

View file

@ -83,7 +83,7 @@ impl<'a, 'tcx> NumericFallbackVisitor<'a, 'tcx> {
}
/// Check whether a passed literal has potential to cause fallback or not.
fn check_lit(&self, lit: &Lit, lit_ty: Ty<'tcx>, emit_hir_id: HirId) {
fn check_lit(&self, lit: Lit, lit_ty: Ty<'tcx>, emit_hir_id: HirId) {
if !lit.span.in_external_macro(self.cx.sess().source_map())
&& matches!(self.ty_bounds.last(), Some(ExplicitTyBound(false)))
&& matches!(
@ -210,7 +210,7 @@ impl<'tcx> Visitor<'tcx> for NumericFallbackVisitor<'_, 'tcx> {
ExprKind::Lit(lit) => {
let ty = self.cx.typeck_results().expr_ty(expr);
self.check_lit(lit, ty, expr.hir_id);
self.check_lit(*lit, ty, expr.hir_id);
return;
},

View file

@ -57,7 +57,7 @@ impl LateLintPass<'_> for LargeIncludeFile {
if let ExprKind::Lit(lit) = &expr.kind
&& let len = match &lit.node {
// include_bytes
LitKind::ByteStr(bstr, _) => bstr.len(),
LitKind::ByteStr(bstr, _) => bstr.as_byte_str().len(),
// include_str
LitKind::Str(sym, _) => sym.as_str().len(),
_ => return,

View file

@ -41,12 +41,12 @@ declare_clippy_lint! {
declare_lint_pass!(ManualIgnoreCaseCmp => [MANUAL_IGNORE_CASE_CMP]);
enum MatchType<'a, 'b> {
enum MatchType<'a> {
ToAscii(bool, Ty<'a>),
Literal(&'b LitKind),
Literal(LitKind),
}
fn get_ascii_type<'a, 'b>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'b>) -> Option<(Span, MatchType<'a, 'b>)> {
fn get_ascii_type<'a>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'_>) -> Option<(Span, MatchType<'a>)> {
if let MethodCall(path, expr, _, _) = kind {
let is_lower = match path.ident.name {
sym::to_ascii_lowercase => true,
@ -63,7 +63,7 @@ fn get_ascii_type<'a, 'b>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'b>) -
return Some((expr.span, ToAscii(is_lower, ty_raw)));
}
} else if let Lit(expr) = kind {
return Some((expr.span, Literal(&expr.node)));
return Some((expr.span, Literal(expr.node)));
}
None
}

View file

@ -184,7 +184,7 @@ fn eq_pattern_length<'tcx>(cx: &LateContext<'tcx>, pattern: &Expr<'_>, expr: &'t
..
}) = expr.kind
{
constant_length(cx, pattern).is_some_and(|length| *n == length)
constant_length(cx, pattern).is_some_and(|length| n == length)
} else {
len_arg(cx, expr).is_some_and(|arg| eq_expr_value(cx, pattern, arg))
}

View file

@ -159,7 +159,7 @@ fn find_bool_lit(ex: &ExprKind<'_>) -> Option<bool> {
node: LitKind::Bool(b), ..
}) = exp.kind
{
Some(*b)
Some(b)
} else {
None
}

View file

@ -12,7 +12,7 @@ use rustc_hir::{Arm, Expr, HirId, HirIdMap, HirIdMapEntry, HirIdSet, Pat, PatExp
use rustc_lint::builtin::NON_EXHAUSTIVE_OMITTED_PATTERNS;
use rustc_lint::{LateContext, LintContext};
use rustc_middle::ty;
use rustc_span::{ErrorGuaranteed, Span, Symbol};
use rustc_span::{ByteSymbol, ErrorGuaranteed, Span, Symbol};
use super::MATCH_SAME_ARMS;
@ -193,7 +193,7 @@ enum NormalizedPat<'a> {
Or(&'a [Self]),
Path(Option<DefId>),
LitStr(Symbol),
LitBytes(&'a [u8]),
LitBytes(ByteSymbol),
LitInt(u128),
LitBool(bool),
Range(PatRange),
@ -332,7 +332,9 @@ impl<'a> NormalizedPat<'a> {
// TODO: Handle negative integers. They're currently treated as a wild match.
PatExprKind::Lit { lit, negated: false } => match lit.node {
LitKind::Str(sym, _) => Self::LitStr(sym),
LitKind::ByteStr(ref bytes, _) | LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes),
LitKind::ByteStr(byte_sym, _) | LitKind::CStr(byte_sym, _) => {
Self::LitBytes(byte_sym)
}
LitKind::Byte(val) => Self::LitInt(val.into()),
LitKind::Char(val) => Self::LitInt(val.into()),
LitKind::Int(val, _) => Self::LitInt(val.get()),

View file

@ -76,7 +76,7 @@ fn get_open_options(
..
} = span
{
Argument::Set(*lit)
Argument::Set(lit)
} else {
// The function is called with a literal which is not a boolean literal.
// This is theoretically possible, but not very likely.

View file

@ -104,7 +104,7 @@ fn len_comparison<'hir>(
) -> Option<(LengthComparison, usize, &'hir Expr<'hir>)> {
macro_rules! int_lit_pat {
($id:ident) => {
ExprKind::Lit(&Spanned {
ExprKind::Lit(Spanned {
node: LitKind::Int(Pu128($id), _),
..
})

View file

@ -324,7 +324,7 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> {
}
}
fn lit(&self, lit: &Binding<&Lit>) {
fn lit(&self, lit: &Binding<Lit>) {
let kind = |kind| chain!(self, "let LitKind::{kind} = {lit}.node");
macro_rules! kind {
($($t:tt)*) => (kind(format_args!($($t)*)));

View file

@ -4,8 +4,6 @@
//! executable MIR bodies, so we have to do this instead.
#![allow(clippy::float_cmp)]
use std::sync::Arc;
use crate::source::{SpanRangeExt, walk_span_to_context};
use crate::{clip, is_direct_expn_of, sext, unsext};
@ -38,7 +36,7 @@ pub enum Constant<'tcx> {
/// A `String` (e.g., "abc").
Str(String),
/// A binary string (e.g., `b"abc"`).
Binary(Arc<[u8]>),
Binary(Vec<u8>),
/// A single `char` (e.g., `'a'`).
Char(char),
/// An integer's bit representation.
@ -306,7 +304,9 @@ pub fn lit_to_mir_constant<'tcx>(lit: &LitKind, ty: Option<Ty<'tcx>>) -> Constan
match *lit {
LitKind::Str(ref is, _) => Constant::Str(is.to_string()),
LitKind::Byte(b) => Constant::Int(u128::from(b)),
LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => Constant::Binary(Arc::clone(s)),
LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => {
Constant::Binary(s.as_byte_str().to_vec())
}
LitKind::Char(c) => Constant::Char(c),
LitKind::Int(n, _) => Constant::Int(n.get()),
LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty {
@ -568,7 +568,9 @@ impl<'tcx> ConstEvalCtxt<'tcx> {
} else {
match &lit.node {
LitKind::Str(is, _) => Some(is.is_empty()),
LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => Some(s.is_empty()),
LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => {
Some(s.as_byte_str().is_empty())
}
_ => None,
}
}