From 04c5e7b54a0ae02a4e03dd472cd05b65ddd16791 Mon Sep 17 00:00:00 2001 From: Mara Bos Date: Tue, 11 Nov 2025 17:17:53 +0100 Subject: [PATCH] Document fmt::Arguments internal representation. --- compiler/rustc_ast_lowering/src/format.rs | 9 +- library/core/src/fmt/mod.rs | 145 +++++++++++++++++- library/core/src/fmt/rt.rs | 36 +---- .../item-collection/opaque-return-impls.rs | 2 +- ...mes.foo.ScalarReplacementOfAggregates.diff | 2 +- tests/pretty/issue-4264.pp | 2 +- 6 files changed, 150 insertions(+), 46 deletions(-) diff --git a/compiler/rustc_ast_lowering/src/format.rs b/compiler/rustc_ast_lowering/src/format.rs index 39e2630ed913..f16579e99945 100644 --- a/compiler/rustc_ast_lowering/src/format.rs +++ b/compiler/rustc_ast_lowering/src/format.rs @@ -314,6 +314,8 @@ fn expand_format_args<'hir>( &fmt.template[..] }; + // See library/core/src/fmt/mod.rs for the format string encoding format. + for (i, piece) in template.iter().enumerate() { match piece { &FormatArgsPiece::Literal(sym) => { @@ -347,6 +349,7 @@ fn expand_format_args<'hir>( return hir::ExprKind::Call(from_str, args); } + // Encode the literal in chunks of up to 127 bytes, split at utf-8 boundaries. while !s.is_empty() { let len = s.floor_char_boundary(127); bytecode.push(len as u8); @@ -357,6 +360,7 @@ fn expand_format_args<'hir>( incomplete_lit.clear(); } FormatArgsPiece::Placeholder(p) => { + // Push the start byte and remember its index so we can set the option bits later. let i = bytecode.len(); bytecode.push(0x80); @@ -370,7 +374,7 @@ fn expand_format_args<'hir>( ) .0 as u64; - // This needs to match the constants in library/core/src/fmt/rt.rs. + // This needs to match the constants in library/core/src/fmt/mod.rs. let o = &p.format_options; let align = match o.alignment { Some(FormatAlignment::Left) => 0, @@ -378,6 +382,7 @@ fn expand_format_args<'hir>( Some(FormatAlignment::Center) => 2, None => 3, }; + let default_flags = 0x6000_0020; let flags: u32 = o.fill.unwrap_or(' ') as u32 | ((o.sign == Some(FormatSign::Plus)) as u32) << 21 | ((o.sign == Some(FormatSign::Minus)) as u32) << 22 @@ -388,7 +393,7 @@ fn expand_format_args<'hir>( | (o.width.is_some() as u32) << 27 | (o.precision.is_some() as u32) << 28 | align << 29; - if flags != 0x6000_0020 { + if flags != default_flags { bytecode[i] |= 1; bytecode.extend_from_slice(&flags.to_le_bytes()); if let Some(val) = &o.width { diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index b28d92b75a2e..f7335ddf470b 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -605,6 +605,101 @@ impl<'a> Formatter<'a> { /// ``` /// /// [`format()`]: ../../std/fmt/fn.format.html +// +// Internal representation: +// +// fmt::Arguments is represented in one of two ways: +// +// 1) String literal representation (e.g. format_args!("hello")) +// ┌────────────────────────────────┐ +// template: │ *const u8 │ ─▷ "hello" +// ├──────────────────────────────┬─┤ +// args: │ len │1│ (lowest bit is 1; field contains `len << 1 | 1`) +// └──────────────────────────────┴─┘ +// In this representation, there are no placeholders and `fmt::Arguments::as_str()` returns Some. +// The pointer points to the start of a static `str`. The length is given by `args as usize >> 1`. +// (The length of a `&str` is isize::MAX at most, so it always fits in a usize minus one bit.) +// +// `fmt::Arguments::from_str()` constructs this representation from a `&'static str`. +// +// 2) Placeholders representation (e.g. format_args!("hello {name}\n")) +// ┌────────────────────────────────┐ +// template: │ *const u8 │ ─▷ b"\x06hello \x80\x01\n\x00" +// ├────────────────────────────────┤ +// args: │ &'a [Argument<'a>; _] 0│ (lower bit is 0 due to alignment of Argument type) +// └────────────────────────────────┘ +// In this representation, the template is a byte sequence encoding both the literal string pieces +// and the placeholders (including their options/flags). +// +// The `args` pointer points to an array of `fmt::Argument<'a>` values, of sufficient length to +// match the placeholders in the template. +// +// `fmt::Arguments::new()` constructs this representation from a template byte slice and a slice +// of arguments. This function is unsafe, as the template is assumed to be valid and the args +// slice is assumed to have elements matching the template. +// +// The template byte sequence is the concatenation of parts of the following types: +// +// - Literal string piece (1-127 bytes): +// ┌───┬────────────────────────────┐ +// │len│ `len` bytes (utf-8) │ (e.g. b"\x06hello ") +// └───┴────────────────────────────┘ +// Pieces that must be formatted verbatim (e.g. "hello " and "\n" in "hello {name}\n") +// are represented as a single byte containing their length followed directly by the bytes +// of the string. +// +// Pieces can be 127 bytes at most. Longer pieces are split into multiple pieces (at utf-8 +// boundaries). +// +// - Placeholder: +// ┌──────────┬┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┬┄┄┄┄┄┄┄┄┄┄┄┐ +// │0b10______│ flags ┊ width ┊ precision ┊ arg_index ┊ (e.g. b"\x82\x05\0") +// └────││││││┴┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┴┄┄┄┄┄┄┄┄┄┄┄┘ +// ││││││ 32 bit 16 bit 16 bit 16 bit +// │││││└─ flags present +// ││││└─ width present +// │││└─ precision present +// ││└─ arg_index present +// │└─ width indirect +// └─ precision indirect +// +// Fully default placeholder, without any options: +// ┌──────────┐ +// │0b10000000│ (b"\x80") +// └──────────┘ +// +// Placeholders (e.g. `{name}` in "hello {name}") are represented as a byte with the highest +// bit set, followed by zero or more fields depending on the flags set in the first byte. +// +// The fields are stored as little endian. +// +// The `flags` fields corresponds to the `flags` field of `FormattingOptions`. +// See doc comment of `FormattingOptions::flags` for details. +// +// The `width` and `precision` fields correspond to their respective fields in +// `FormattingOptions`. However, if their "indirect" flag is set, the field contains the +// index in the `args` array where the dynamic width or precision is stored, rather than the +// value directly. +// +// The `arg_index` field is the index into the `args` array for the argument to be +// formatted. +// +// If omitted, the flags, width and precision of the default FormattingOptions::new() are +// used. +// +// If the `arg_index` is omitted, the next argument in the `args` array is used (starting +// at 0). +// +// - End: +// ┌───┐ +// │ 0 │ ("\0") +// └───┘ +// A single zero byte marks the end of the template. +// +// (Note that a zero byte may also occur naturally as part of the string pieces or flags, +// width, precision and arg_index fields above. That is, the template byte sequence ends +// with a 0 byte, but isn't terminated by the first 0 byte.) +// #[lang = "format_arguments"] #[stable(feature = "rust1", since = "1.0.0")] #[derive(Copy, Clone)] @@ -613,6 +708,42 @@ pub struct Arguments<'a> { args: NonNull>, } +/// Used by the format_args!() macro to create a fmt::Arguments object. +#[doc(hidden)] +#[rustc_diagnostic_item = "FmtArgumentsNew"] +#[unstable(feature = "fmt_internals", issue = "none")] +impl<'a> Arguments<'a> { + // SAFETY: The caller must ensure that the provided template and args encode a valid + // fmt::Arguments, as documented above. + #[inline] + pub unsafe fn new( + template: &'a [u8; N], + args: &'a [rt::Argument<'a>; M], + ) -> Arguments<'a> { + // SAFETY: Responsibility of the caller. + unsafe { Arguments { template: mem::transmute(template), args: mem::transmute(args) } } + } + + #[inline] + pub const fn from_str(s: &'static str) -> Arguments<'a> { + // SAFETY: This is the "static str" representation of fmt::Arguments; see above. + unsafe { + Arguments { + template: mem::transmute(s.as_ptr()), + args: mem::transmute(s.len() << 1 | 1), + } + } + } + + // Same as `from_str`, but not const. + // Used by format_args!() expansion when arguments are inlined, + // e.g. format_args!("{}", 123), which is not allowed in const. + #[inline] + pub fn from_str_nonconst(s: &'static str) -> Arguments<'a> { + Arguments::from_str(s) + } +} + #[doc(hidden)] #[unstable(feature = "fmt_internals", issue = "none")] impl<'a> Arguments<'a> { @@ -646,10 +777,10 @@ impl<'a> Arguments<'a> { starts_with_placeholder = true; } // Skip remainder of placeholder: - let skip = (n & 1 == 1) as usize * 4 - + (n & 2 == 2) as usize * 2 - + (n & 4 == 4) as usize * 2 - + (n & 8 == 8) as usize * 2; + let skip = (n & 1 != 0) as usize * 4 // flags (32 bit) + + (n & 2 != 0) as usize * 2 // width (16 bit) + + (n & 4 != 0) as usize * 2 // precision (16 bit) + + (n & 8 != 0) as usize * 2; // arg_index (16 bit) template = template.add(1 + skip as usize); } } @@ -718,11 +849,13 @@ impl<'a> Arguments<'a> { #[inline] pub const fn as_str(&self) -> Option<&'static str> { // SAFETY: During const eval, `self.args` must have come from a usize, - // not a pointer, because that's the only way to creat a fmt::Arguments in const. + // not a pointer, because that's the only way to create a fmt::Arguments in const. + // (I.e. only fmt::Arguments::from_str is const, fmt::Arguments::new is not.) + // // Outside const eval, transmuting a pointer to a usize is fine. let bits: usize = unsafe { mem::transmute(self.args) }; if bits & 1 == 1 { - // SAFETY: This fmt::Arguments stores a &'static str. + // SAFETY: This fmt::Arguments stores a &'static str. See encoding documentation above. Some(unsafe { str::from_utf8_unchecked(crate::slice::from_raw_parts( self.template.as_ptr(), diff --git a/library/core/src/fmt/rt.rs b/library/core/src/fmt/rt.rs index 6629873a567a..93f4c57abcc8 100644 --- a/library/core/src/fmt/rt.rs +++ b/library/core/src/fmt/rt.rs @@ -8,7 +8,6 @@ use super::*; use crate::hint::unreachable_unchecked; -use crate::mem; use crate::ptr::NonNull; #[derive(Copy, Clone)] @@ -35,7 +34,7 @@ enum ArgumentType<'a> { /// precision and width. #[lang = "format_argument"] #[derive(Copy, Clone)] -#[repr(align(2))] +#[repr(align(2))] // To ensure pointers to this always have their lowest bit cleared. pub struct Argument<'a> { ty: ArgumentType<'a>, } @@ -164,36 +163,3 @@ impl Argument<'_> { } } } - -/// Used by the format_args!() macro to create a fmt::Arguments object. -#[doc(hidden)] -#[rustc_diagnostic_item = "FmtArgumentsNew"] -impl<'a> Arguments<'a> { - #[inline] - pub unsafe fn new( - template: &'a [u8; N], - args: &'a [rt::Argument<'a>; M], - ) -> Arguments<'a> { - // SAFETY: ... - unsafe { Arguments { template: mem::transmute(template), args: mem::transmute(args) } } - } - - #[inline] - pub const fn from_str(s: &'static str) -> Arguments<'a> { - // SAFETY: This is the "static str" representation of fmt::Arguments. - unsafe { - Arguments { - template: mem::transmute(s.as_ptr()), - args: mem::transmute(s.len() << 1 | 1), - } - } - } - - // Same as `from_str`, but not const. - // Used by format_args!() expansion when arguments are inlined, - // e.g. format_args!("{}", 123), which is not allowed in const. - #[inline] - pub fn from_str_nonconst(s: &'static str) -> Arguments<'a> { - Arguments::from_str(s) - } -} diff --git a/tests/codegen-units/item-collection/opaque-return-impls.rs b/tests/codegen-units/item-collection/opaque-return-impls.rs index 2859e1679918..1659b62175b7 100644 --- a/tests/codegen-units/item-collection/opaque-return-impls.rs +++ b/tests/codegen-units/item-collection/opaque-return-impls.rs @@ -86,4 +86,4 @@ pub fn foo3() -> Box> { //~ MONO_ITEM fn foo3 //~ MONO_ITEM fn std::boxed::Box::::new //~ MONO_ITEM fn Counter::new -//~ MONO_ITEM fn core::fmt::rt::>::from_str +//~ MONO_ITEM fn std::fmt::Arguments::<'_>::from_str diff --git a/tests/mir-opt/sroa/lifetimes.foo.ScalarReplacementOfAggregates.diff b/tests/mir-opt/sroa/lifetimes.foo.ScalarReplacementOfAggregates.diff index 4b867d4325c8..68f6b45f29e8 100644 --- a/tests/mir-opt/sroa/lifetimes.foo.ScalarReplacementOfAggregates.diff +++ b/tests/mir-opt/sroa/lifetimes.foo.ScalarReplacementOfAggregates.diff @@ -144,7 +144,7 @@ StorageLive(_23); _23 = &_15; _22 = &(*_23); - _11 = core::fmt::rt::>::new::<7, 2>(move _20, move _22) -> [return: bb5, unwind unreachable]; + _11 = Arguments::<'_>::new::<7, 2>(move _20, move _22) -> [return: bb5, unwind unreachable]; } bb5: { diff --git a/tests/pretty/issue-4264.pp b/tests/pretty/issue-4264.pp index c4517574a466..4eee6655cf6f 100644 --- a/tests/pretty/issue-4264.pp +++ b/tests/pretty/issue-4264.pp @@ -34,7 +34,7 @@ fn bar() ({ ((::alloc::fmt::format as for<'a> fn(Arguments<'a>) -> String {format})(((format_arguments::from_str as - fn(&'static str) -> Arguments<'_> {core::fmt::rt::>::from_str})(("test" + fn(&'static str) -> Arguments<'_> {Arguments::<'_>::from_str})(("test" as &str)) as Arguments<'_>)) as String) } as String)) as String); } as ())