Document fmt::Arguments internal representation.

This commit is contained in:
Mara Bos 2025-11-11 17:17:53 +01:00
parent 560b12d094
commit 04c5e7b54a
6 changed files with 150 additions and 46 deletions

View file

@ -314,6 +314,8 @@ fn expand_format_args<'hir>(
&fmt.template[..]
};
// See library/core/src/fmt/mod.rs for the format string encoding format.
for (i, piece) in template.iter().enumerate() {
match piece {
&FormatArgsPiece::Literal(sym) => {
@ -347,6 +349,7 @@ fn expand_format_args<'hir>(
return hir::ExprKind::Call(from_str, args);
}
// Encode the literal in chunks of up to 127 bytes, split at utf-8 boundaries.
while !s.is_empty() {
let len = s.floor_char_boundary(127);
bytecode.push(len as u8);
@ -357,6 +360,7 @@ fn expand_format_args<'hir>(
incomplete_lit.clear();
}
FormatArgsPiece::Placeholder(p) => {
// Push the start byte and remember its index so we can set the option bits later.
let i = bytecode.len();
bytecode.push(0x80);
@ -370,7 +374,7 @@ fn expand_format_args<'hir>(
)
.0 as u64;
// This needs to match the constants in library/core/src/fmt/rt.rs.
// This needs to match the constants in library/core/src/fmt/mod.rs.
let o = &p.format_options;
let align = match o.alignment {
Some(FormatAlignment::Left) => 0,
@ -378,6 +382,7 @@ fn expand_format_args<'hir>(
Some(FormatAlignment::Center) => 2,
None => 3,
};
let default_flags = 0x6000_0020;
let flags: u32 = o.fill.unwrap_or(' ') as u32
| ((o.sign == Some(FormatSign::Plus)) as u32) << 21
| ((o.sign == Some(FormatSign::Minus)) as u32) << 22
@ -388,7 +393,7 @@ fn expand_format_args<'hir>(
| (o.width.is_some() as u32) << 27
| (o.precision.is_some() as u32) << 28
| align << 29;
if flags != 0x6000_0020 {
if flags != default_flags {
bytecode[i] |= 1;
bytecode.extend_from_slice(&flags.to_le_bytes());
if let Some(val) = &o.width {

View file

@ -605,6 +605,101 @@ impl<'a> Formatter<'a> {
/// ```
///
/// [`format()`]: ../../std/fmt/fn.format.html
//
// Internal representation:
//
// fmt::Arguments is represented in one of two ways:
//
// 1) String literal representation (e.g. format_args!("hello"))
// ┌────────────────────────────────┐
// template: │ *const u8 │ ─▷ "hello"
// ├──────────────────────────────┬─┤
// args: │ len │1│ (lowest bit is 1; field contains `len << 1 | 1`)
// └──────────────────────────────┴─┘
// In this representation, there are no placeholders and `fmt::Arguments::as_str()` returns Some.
// The pointer points to the start of a static `str`. The length is given by `args as usize >> 1`.
// (The length of a `&str` is isize::MAX at most, so it always fits in a usize minus one bit.)
//
// `fmt::Arguments::from_str()` constructs this representation from a `&'static str`.
//
// 2) Placeholders representation (e.g. format_args!("hello {name}\n"))
// ┌────────────────────────────────┐
// template: │ *const u8 │ ─▷ b"\x06hello \x80\x01\n\x00"
// ├────────────────────────────────┤
// args: │ &'a [Argument<'a>; _] 0│ (lower bit is 0 due to alignment of Argument type)
// └────────────────────────────────┘
// In this representation, the template is a byte sequence encoding both the literal string pieces
// and the placeholders (including their options/flags).
//
// The `args` pointer points to an array of `fmt::Argument<'a>` values, of sufficient length to
// match the placeholders in the template.
//
// `fmt::Arguments::new()` constructs this representation from a template byte slice and a slice
// of arguments. This function is unsafe, as the template is assumed to be valid and the args
// slice is assumed to have elements matching the template.
//
// The template byte sequence is the concatenation of parts of the following types:
//
// - Literal string piece (1-127 bytes):
// ┌───┬────────────────────────────┐
// │len│ `len` bytes (utf-8) │ (e.g. b"\x06hello ")
// └───┴────────────────────────────┘
// Pieces that must be formatted verbatim (e.g. "hello " and "\n" in "hello {name}\n")
// are represented as a single byte containing their length followed directly by the bytes
// of the string.
//
// Pieces can be 127 bytes at most. Longer pieces are split into multiple pieces (at utf-8
// boundaries).
//
// - Placeholder:
// ┌──────────┬┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┐
// │0b10______│ flags ┊ width ┊ precision ┊ arg_index ┊ (e.g. b"\x82\x05\0")
// └────││││││┴┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┘
// ││││││ 32 bit 16 bit 16 bit 16 bit
// │││││└─ flags present
// ││││└─ width present
// │││└─ precision present
// ││└─ arg_index present
// │└─ width indirect
// └─ precision indirect
//
// Fully default placeholder, without any options:
// ┌──────────┐
// │0b10000000│ (b"\x80")
// └──────────┘
//
// Placeholders (e.g. `{name}` in "hello {name}") are represented as a byte with the highest
// bit set, followed by zero or more fields depending on the flags set in the first byte.
//
// The fields are stored as little endian.
//
// The `flags` fields corresponds to the `flags` field of `FormattingOptions`.
// See doc comment of `FormattingOptions::flags` for details.
//
// The `width` and `precision` fields correspond to their respective fields in
// `FormattingOptions`. However, if their "indirect" flag is set, the field contains the
// index in the `args` array where the dynamic width or precision is stored, rather than the
// value directly.
//
// The `arg_index` field is the index into the `args` array for the argument to be
// formatted.
//
// If omitted, the flags, width and precision of the default FormattingOptions::new() are
// used.
//
// If the `arg_index` is omitted, the next argument in the `args` array is used (starting
// at 0).
//
// - End:
// ┌───┐
// │ 0 │ ("\0")
// └───┘
// A single zero byte marks the end of the template.
//
// (Note that a zero byte may also occur naturally as part of the string pieces or flags,
// width, precision and arg_index fields above. That is, the template byte sequence ends
// with a 0 byte, but isn't terminated by the first 0 byte.)
//
#[lang = "format_arguments"]
#[stable(feature = "rust1", since = "1.0.0")]
#[derive(Copy, Clone)]
@ -613,6 +708,42 @@ pub struct Arguments<'a> {
args: NonNull<rt::Argument<'a>>,
}
/// Used by the format_args!() macro to create a fmt::Arguments object.
#[doc(hidden)]
#[rustc_diagnostic_item = "FmtArgumentsNew"]
#[unstable(feature = "fmt_internals", issue = "none")]
impl<'a> Arguments<'a> {
// SAFETY: The caller must ensure that the provided template and args encode a valid
// fmt::Arguments, as documented above.
#[inline]
pub unsafe fn new<const N: usize, const M: usize>(
template: &'a [u8; N],
args: &'a [rt::Argument<'a>; M],
) -> Arguments<'a> {
// SAFETY: Responsibility of the caller.
unsafe { Arguments { template: mem::transmute(template), args: mem::transmute(args) } }
}
#[inline]
pub const fn from_str(s: &'static str) -> Arguments<'a> {
// SAFETY: This is the "static str" representation of fmt::Arguments; see above.
unsafe {
Arguments {
template: mem::transmute(s.as_ptr()),
args: mem::transmute(s.len() << 1 | 1),
}
}
}
// Same as `from_str`, but not const.
// Used by format_args!() expansion when arguments are inlined,
// e.g. format_args!("{}", 123), which is not allowed in const.
#[inline]
pub fn from_str_nonconst(s: &'static str) -> Arguments<'a> {
Arguments::from_str(s)
}
}
#[doc(hidden)]
#[unstable(feature = "fmt_internals", issue = "none")]
impl<'a> Arguments<'a> {
@ -646,10 +777,10 @@ impl<'a> Arguments<'a> {
starts_with_placeholder = true;
}
// Skip remainder of placeholder:
let skip = (n & 1 == 1) as usize * 4
+ (n & 2 == 2) as usize * 2
+ (n & 4 == 4) as usize * 2
+ (n & 8 == 8) as usize * 2;
let skip = (n & 1 != 0) as usize * 4 // flags (32 bit)
+ (n & 2 != 0) as usize * 2 // width (16 bit)
+ (n & 4 != 0) as usize * 2 // precision (16 bit)
+ (n & 8 != 0) as usize * 2; // arg_index (16 bit)
template = template.add(1 + skip as usize);
}
}
@ -718,11 +849,13 @@ impl<'a> Arguments<'a> {
#[inline]
pub const fn as_str(&self) -> Option<&'static str> {
// SAFETY: During const eval, `self.args` must have come from a usize,
// not a pointer, because that's the only way to creat a fmt::Arguments in const.
// not a pointer, because that's the only way to create a fmt::Arguments in const.
// (I.e. only fmt::Arguments::from_str is const, fmt::Arguments::new is not.)
//
// Outside const eval, transmuting a pointer to a usize is fine.
let bits: usize = unsafe { mem::transmute(self.args) };
if bits & 1 == 1 {
// SAFETY: This fmt::Arguments stores a &'static str.
// SAFETY: This fmt::Arguments stores a &'static str. See encoding documentation above.
Some(unsafe {
str::from_utf8_unchecked(crate::slice::from_raw_parts(
self.template.as_ptr(),

View file

@ -8,7 +8,6 @@
use super::*;
use crate::hint::unreachable_unchecked;
use crate::mem;
use crate::ptr::NonNull;
#[derive(Copy, Clone)]
@ -35,7 +34,7 @@ enum ArgumentType<'a> {
/// precision and width.
#[lang = "format_argument"]
#[derive(Copy, Clone)]
#[repr(align(2))]
#[repr(align(2))] // To ensure pointers to this always have their lowest bit cleared.
pub struct Argument<'a> {
ty: ArgumentType<'a>,
}
@ -164,36 +163,3 @@ impl Argument<'_> {
}
}
}
/// Used by the format_args!() macro to create a fmt::Arguments object.
#[doc(hidden)]
#[rustc_diagnostic_item = "FmtArgumentsNew"]
impl<'a> Arguments<'a> {
#[inline]
pub unsafe fn new<const N: usize, const M: usize>(
template: &'a [u8; N],
args: &'a [rt::Argument<'a>; M],
) -> Arguments<'a> {
// SAFETY: ...
unsafe { Arguments { template: mem::transmute(template), args: mem::transmute(args) } }
}
#[inline]
pub const fn from_str(s: &'static str) -> Arguments<'a> {
// SAFETY: This is the "static str" representation of fmt::Arguments.
unsafe {
Arguments {
template: mem::transmute(s.as_ptr()),
args: mem::transmute(s.len() << 1 | 1),
}
}
}
// Same as `from_str`, but not const.
// Used by format_args!() expansion when arguments are inlined,
// e.g. format_args!("{}", 123), which is not allowed in const.
#[inline]
pub fn from_str_nonconst(s: &'static str) -> Arguments<'a> {
Arguments::from_str(s)
}
}

View file

@ -86,4 +86,4 @@ pub fn foo3() -> Box<dyn Iterator<Item = usize>> {
//~ MONO_ITEM fn foo3
//~ MONO_ITEM fn std::boxed::Box::<Counter>::new
//~ MONO_ITEM fn Counter::new
//~ MONO_ITEM fn core::fmt::rt::<impl std::fmt::Arguments<'_>>::from_str
//~ MONO_ITEM fn std::fmt::Arguments::<'_>::from_str

View file

@ -144,7 +144,7 @@
StorageLive(_23);
_23 = &_15;
_22 = &(*_23);
_11 = core::fmt::rt::<impl Arguments<'_>>::new::<7, 2>(move _20, move _22) -> [return: bb5, unwind unreachable];
_11 = Arguments::<'_>::new::<7, 2>(move _20, move _22) -> [return: bb5, unwind unreachable];
}
bb5: {

View file

@ -34,7 +34,7 @@ fn bar() ({
((::alloc::fmt::format as
for<'a> fn(Arguments<'a>) -> String {format})(((format_arguments::from_str
as
fn(&'static str) -> Arguments<'_> {core::fmt::rt::<impl Arguments<'_>>::from_str})(("test"
fn(&'static str) -> Arguments<'_> {Arguments::<'_>::from_str})(("test"
as &str)) as Arguments<'_>)) as String)
} as String)) as String);
} as ())