Rollup merge of #142098 - GuillaumeGomez:int_format_into, r=Amanieu

Implement `int_format_into` feature

I took over rust-lang/rust#138338 with `@madhav-madhusoodanan's` approval.

Since https://github.com/rust-lang/rust/pull/136264, a lot of changes happened so I made use of them to reduce the number of changes.

ACP approval: https://github.com/rust-lang/libs-team/issues/546#issuecomment-2707244569

## Associated Issue
- https://github.com/rust-lang/rust/issues/138215

r? `@hanna-kruppe`
This commit is contained in:
Matthias Krüger 2025-07-08 03:09:56 +02:00 committed by GitHub
commit d41f046de5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 327 additions and 84 deletions

View file

@ -2875,7 +2875,8 @@ macro_rules! impl_to_string {
out = String::with_capacity(SIZE);
}
out.push_str(self.unsigned_abs()._fmt(&mut buf));
// SAFETY: `buf` is always big enough to contain all the digits.
unsafe { out.push_str(self.unsigned_abs()._fmt(&mut buf)); }
out
}
}
@ -2887,7 +2888,8 @@ macro_rules! impl_to_string {
const SIZE: usize = $unsigned::MAX.ilog10() as usize + 1;
let mut buf = [core::mem::MaybeUninit::<u8>::uninit(); SIZE];
self._fmt(&mut buf).to_string()
// SAFETY: `buf` is always big enough to contain all the digits.
unsafe { self._fmt(&mut buf).to_string() }
}
}
)*

View file

@ -9,6 +9,7 @@
#![feature(downcast_unchecked)]
#![feature(exact_size_is_empty)]
#![feature(hashmap_internals)]
#![feature(int_format_into)]
#![feature(linked_list_cursors)]
#![feature(map_try_insert)]
#![feature(pattern)]

View file

@ -1,15 +1,21 @@
use std::fmt::{Debug, Display};
use core::fmt::NumBuffer;
use std::str::FromStr;
fn assert_nb<Int: ToString + FromStr + Debug + Display + Eq>(value: Int) {
let s = value.to_string();
let s2 = format!("s: {}.", value);
macro_rules! assert_nb {
($int:ident, $value:expr) => {
let value: $int = $value;
let s = value.to_string();
let s2 = format!("s: {}.", value);
assert_eq!(format!("s: {s}."), s2);
let Ok(ret) = Int::from_str(&s) else {
panic!("failed to convert into to string");
assert_eq!(format!("s: {s}."), s2);
let Ok(ret) = $int::from_str(&s) else {
panic!("failed to convert into to string");
};
assert_eq!(ret, value);
let mut buffer = NumBuffer::<$int>::new();
assert_eq!(value.format_into(&mut buffer), s.as_str());
};
assert_eq!(ret, value);
}
macro_rules! uint_to_s {
@ -17,11 +23,11 @@ macro_rules! uint_to_s {
$(
#[test]
fn $fn_name() {
assert_nb::<$int>($int::MIN);
assert_nb::<$int>($int::MAX);
assert_nb::<$int>(1);
assert_nb::<$int>($int::MIN / 2);
assert_nb::<$int>($int::MAX / 2);
assert_nb!($int, $int::MIN);
assert_nb!($int, $int::MAX);
assert_nb!($int, 1);
assert_nb!($int, $int::MIN / 2);
assert_nb!($int, $int::MAX / 2);
}
)+
}
@ -31,13 +37,13 @@ macro_rules! int_to_s {
$(
#[test]
fn $fn_name() {
assert_nb::<$int>($int::MIN);
assert_nb::<$int>($int::MAX);
assert_nb::<$int>(1);
assert_nb::<$int>(0);
assert_nb::<$int>(-1);
assert_nb::<$int>($int::MIN / 2);
assert_nb::<$int>($int::MAX / 2);
assert_nb!($int, $int::MIN);
assert_nb!($int, $int::MAX);
assert_nb!($int, 1);
assert_nb!($int, 0);
assert_nb!($int, -1);
assert_nb!($int, $int::MIN / 2);
assert_nb!($int, $int::MAX / 2);
}
)+
}

View file

@ -15,6 +15,7 @@ mod float;
#[cfg(no_fp_fmt_parse)]
mod nofloat;
mod num;
mod num_buffer;
mod rt;
#[stable(feature = "fmt_flags_align", since = "1.28.0")]
@ -33,6 +34,9 @@ pub enum Alignment {
Center,
}
#[unstable(feature = "int_format_into", issue = "138215")]
pub use num_buffer::{NumBuffer, NumBufferTrait};
#[stable(feature = "debug_builders", since = "1.2.0")]
pub use self::builders::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple};
#[unstable(feature = "debug_closure_helpers", issue = "117729")]

View file

@ -1,5 +1,6 @@
//! Integer and floating-point number formatting
use crate::fmt::NumBuffer;
use crate::mem::MaybeUninit;
use crate::num::fmt as numfmt;
use crate::ops::{Div, Rem, Sub};
@ -60,7 +61,7 @@ unsafe trait GenericRadix: Sized {
let zero = T::zero();
let is_nonnegative = x >= zero;
let mut buf = [MaybeUninit::<u8>::uninit(); 128];
let mut curr = buf.len();
let mut offset = buf.len();
let base = T::from_u8(Self::BASE);
if is_nonnegative {
// Accumulate each digit of the number from the least significant
@ -68,8 +69,8 @@ unsafe trait GenericRadix: Sized {
loop {
let n = x % base; // Get the current place value.
x = x / base; // Deaccumulate the number.
curr -= 1;
buf[curr].write(Self::digit(n.to_u8())); // Store the digit in the buffer.
offset -= 1;
buf[offset].write(Self::digit(n.to_u8())); // Store the digit in the buffer.
if x == zero {
// No more digits left to accumulate.
break;
@ -80,27 +81,17 @@ unsafe trait GenericRadix: Sized {
loop {
let n = zero - (x % base); // Get the current place value.
x = x / base; // Deaccumulate the number.
curr -= 1;
buf[curr].write(Self::digit(n.to_u8())); // Store the digit in the buffer.
offset -= 1;
buf[offset].write(Self::digit(n.to_u8())); // Store the digit in the buffer.
if x == zero {
// No more digits left to accumulate.
break;
};
}
}
// SAFETY: `curr` is initialized to `buf.len()` and is only decremented, so it can't overflow. It is
// decremented exactly once for each digit. Since u128 is the widest fixed width integer format supported,
// the maximum number of digits (bits) is 128 for base-2, so `curr` won't underflow as well.
let buf = unsafe { buf.get_unchecked(curr..) };
// SAFETY: The only chars in `buf` are created by `Self::digit` which are assumed to be
// valid UTF-8
let buf = unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(
MaybeUninit::slice_as_ptr(buf),
buf.len(),
))
};
f.pad_integral(is_nonnegative, Self::PREFIX, buf)
// SAFETY: Starting from `offset`, all elements of the slice have been set.
let buf_slice = unsafe { slice_buffer_to_str(&buf, offset) };
f.pad_integral(is_nonnegative, Self::PREFIX, buf_slice)
}
}
@ -199,6 +190,20 @@ static DEC_DIGITS_LUT: &[u8; 200] = b"\
6061626364656667686970717273747576777879\
8081828384858687888990919293949596979899";
/// This function converts a slice of ascii characters into a `&str` starting from `offset`.
///
/// # Safety
///
/// `buf` content starting from `offset` index MUST BE initialized and MUST BE ascii
/// characters.
unsafe fn slice_buffer_to_str(buf: &[MaybeUninit<u8>], offset: usize) -> &str {
// SAFETY: `offset` is always included between 0 and `buf`'s length.
let written = unsafe { buf.get_unchecked(offset..) };
// SAFETY: (`assume_init_ref`) All buf content since offset is set.
// SAFETY: (`from_utf8_unchecked`) Writes use ASCII from the lookup table exclusively.
unsafe { str::from_utf8_unchecked(written.assume_init_ref()) }
}
macro_rules! impl_Display {
($($signed:ident, $unsigned:ident,)* ; as $u:ident via $conv_fn:ident named $gen_name:ident) => {
@ -212,7 +217,8 @@ macro_rules! impl_Display {
// Buffer decimals for $unsigned with right alignment.
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_DEC_N];
f.pad_integral(true, "", self._fmt(&mut buf))
// SAFETY: `buf` is always big enough to contain all the digits.
unsafe { f.pad_integral(true, "", self._fmt(&mut buf)) }
}
#[cfg(feature = "optimize_for_size")]
{
@ -230,7 +236,8 @@ macro_rules! impl_Display {
// Buffer decimals for $unsigned with right alignment.
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_DEC_N];
f.pad_integral(*self >= 0, "", self.unsigned_abs()._fmt(&mut buf))
// SAFETY: `buf` is always big enough to contain all the digits.
unsafe { f.pad_integral(*self >= 0, "", self.unsigned_abs()._fmt(&mut buf)) }
}
#[cfg(feature = "optimize_for_size")]
{
@ -247,7 +254,14 @@ macro_rules! impl_Display {
reason = "specialized method meant to only be used by `SpecToString` implementation",
issue = "none"
)]
pub fn _fmt<'a>(self, buf: &'a mut [MaybeUninit::<u8>]) -> &'a str {
pub unsafe fn _fmt<'a>(self, buf: &'a mut [MaybeUninit::<u8>]) -> &'a str {
// SAFETY: `buf` will always be big enough to contain all digits.
let offset = unsafe { self._fmt_inner(buf) };
// SAFETY: Starting from `offset`, all elements of the slice have been set.
unsafe { slice_buffer_to_str(buf, offset) }
}
unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit::<u8>]) -> usize {
// Count the number of bytes in buf that are not initialized.
let mut offset = buf.len();
// Consume the least-significant decimals from a working copy.
@ -309,47 +323,123 @@ macro_rules! impl_Display {
// not used: remain = 0;
}
// SAFETY: All buf content since offset is set.
let written = unsafe { buf.get_unchecked(offset..) };
// SAFETY: Writes use ASCII from the lookup table exclusively.
unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(
MaybeUninit::slice_as_ptr(written),
written.len(),
))
}
offset
}
})*
}
impl $signed {
/// Allows users to write an integer (in signed decimal format) into a variable `buf` of
/// type [`NumBuffer`] that is passed by the caller by mutable reference.
///
/// # Examples
///
/// ```
/// #![feature(int_format_into)]
/// use core::fmt::NumBuffer;
///
#[doc = concat!("let n = 0", stringify!($signed), ";")]
/// let mut buf = NumBuffer::new();
/// assert_eq!(n.format_into(&mut buf), "0");
///
#[doc = concat!("let n1 = 32", stringify!($signed), ";")]
/// assert_eq!(n1.format_into(&mut buf), "32");
///
#[doc = concat!("let n2 = ", stringify!($signed::MAX), ";")]
#[doc = concat!("assert_eq!(n2.format_into(&mut buf), ", stringify!($signed::MAX), ".to_string());")]
/// ```
#[unstable(feature = "int_format_into", issue = "138215")]
pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
let mut offset;
#[cfg(not(feature = "optimize_for_size"))]
// SAFETY: `buf` will always be big enough to contain all digits.
unsafe {
offset = self.unsigned_abs()._fmt_inner(&mut buf.buf);
}
#[cfg(feature = "optimize_for_size")]
{
offset = _inner_slow_integer_to_str(self.unsigned_abs().$conv_fn(), &mut buf.buf);
}
// Only difference between signed and unsigned are these 4 lines.
if self < 0 {
offset -= 1;
buf.buf[offset].write(b'-');
}
// SAFETY: Starting from `offset`, all elements of the slice have been set.
unsafe { slice_buffer_to_str(&buf.buf, offset) }
}
}
impl $unsigned {
/// Allows users to write an integer (in signed decimal format) into a variable `buf` of
/// type [`NumBuffer`] that is passed by the caller by mutable reference.
///
/// # Examples
///
/// ```
/// #![feature(int_format_into)]
/// use core::fmt::NumBuffer;
///
#[doc = concat!("let n = 0", stringify!($unsigned), ";")]
/// let mut buf = NumBuffer::new();
/// assert_eq!(n.format_into(&mut buf), "0");
///
#[doc = concat!("let n1 = 32", stringify!($unsigned), ";")]
/// assert_eq!(n1.format_into(&mut buf), "32");
///
#[doc = concat!("let n2 = ", stringify!($unsigned::MAX), ";")]
#[doc = concat!("assert_eq!(n2.format_into(&mut buf), ", stringify!($unsigned::MAX), ".to_string());")]
/// ```
#[unstable(feature = "int_format_into", issue = "138215")]
pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
let offset;
#[cfg(not(feature = "optimize_for_size"))]
// SAFETY: `buf` will always be big enough to contain all digits.
unsafe {
offset = self._fmt_inner(&mut buf.buf);
}
#[cfg(feature = "optimize_for_size")]
{
offset = _inner_slow_integer_to_str(self.$conv_fn(), &mut buf.buf);
}
// SAFETY: Starting from `offset`, all elements of the slice have been set.
unsafe { slice_buffer_to_str(&buf.buf, offset) }
}
}
)*
#[cfg(feature = "optimize_for_size")]
fn $gen_name(mut n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
const MAX_DEC_N: usize = $u::MAX.ilog10() as usize + 1;
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_DEC_N];
let mut curr = MAX_DEC_N;
let buf_ptr = MaybeUninit::slice_as_mut_ptr(&mut buf);
fn _inner_slow_integer_to_str(mut n: $u, buf: &mut [MaybeUninit::<u8>]) -> usize {
let mut curr = buf.len();
// SAFETY: To show that it's OK to copy into `buf_ptr`, notice that at the beginning
// `curr == buf.len() == 39 > log(n)` since `n < 2^128 < 10^39`, and at
// each step this is kept the same as `n` is divided. Since `n` is always
// non-negative, this means that `curr > 0` so `buf_ptr[curr..curr + 1]`
// is safe to access.
unsafe {
loop {
curr -= 1;
buf_ptr.add(curr).write((n % 10) as u8 + b'0');
n /= 10;
loop {
curr -= 1;
buf[curr].write((n % 10) as u8 + b'0');
n /= 10;
if n == 0 {
break;
}
if n == 0 {
break;
}
}
curr
}
// SAFETY: `curr` > 0 (since we made `buf` large enough), and all the chars are valid UTF-8
let buf_slice = unsafe {
str::from_utf8_unchecked(
slice::from_raw_parts(buf_ptr.add(curr), buf.len() - curr))
};
#[cfg(feature = "optimize_for_size")]
fn $gen_name(n: $u, is_nonnegative: bool, f: &mut fmt::Formatter<'_>) -> fmt::Result {
const MAX_DEC_N: usize = $u::MAX.ilog(10) as usize + 1;
let mut buf = [MaybeUninit::<u8>::uninit(); MAX_DEC_N];
let offset = _inner_slow_integer_to_str(n, &mut buf);
// SAFETY: Starting from `offset`, all elements of the slice have been set.
let buf_slice = unsafe { slice_buffer_to_str(&buf, offset) };
f.pad_integral(is_nonnegative, "", buf_slice)
}
};
@ -572,7 +662,8 @@ impl fmt::Display for u128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut buf = [MaybeUninit::<u8>::uninit(); U128_MAX_DEC_N];
f.pad_integral(true, "", self._fmt(&mut buf))
// SAFETY: `buf` is always big enough to contain all the digits.
unsafe { f.pad_integral(true, "", self._fmt(&mut buf)) }
}
}
@ -584,7 +675,8 @@ impl fmt::Display for i128 {
let mut buf = [MaybeUninit::<u8>::uninit(); U128_MAX_DEC_N];
let is_nonnegative = *self >= 0;
f.pad_integral(is_nonnegative, "", self.unsigned_abs()._fmt(&mut buf))
// SAFETY: `buf` is always big enough to contain all the digits.
unsafe { f.pad_integral(is_nonnegative, "", self.unsigned_abs()._fmt(&mut buf)) }
}
}
@ -597,13 +689,21 @@ impl u128 {
reason = "specialized method meant to only be used by `SpecToString` implementation",
issue = "none"
)]
pub fn _fmt<'a>(self, buf: &'a mut [MaybeUninit<u8>]) -> &'a str {
pub unsafe fn _fmt<'a>(self, buf: &'a mut [MaybeUninit<u8>]) -> &'a str {
// SAFETY: `buf` will always be big enough to contain all digits.
let offset = unsafe { self._fmt_inner(buf) };
// SAFETY: Starting from `offset`, all elements of the slice have been set.
unsafe { slice_buffer_to_str(buf, offset) }
}
unsafe fn _fmt_inner(self, buf: &mut [MaybeUninit<u8>]) -> usize {
// Optimize common-case zero, which would also need special treatment due to
// its "leading" zero.
if self == 0 {
return "0";
let offset = buf.len() - 1;
buf[offset].write(b'0');
return offset;
}
// Take the 16 least-significant decimals.
let (quot_1e16, mod_1e16) = div_rem_1e16(self);
let (mut remain, mut offset) = if quot_1e16 == 0 {
@ -677,16 +777,86 @@ impl u128 {
buf[offset].write(DEC_DIGITS_LUT[last * 2 + 1]);
// not used: remain = 0;
}
offset
}
// SAFETY: All buf content since offset is set.
let written = unsafe { buf.get_unchecked(offset..) };
// SAFETY: Writes use ASCII from the lookup table exclusively.
unsafe {
str::from_utf8_unchecked(slice::from_raw_parts(
MaybeUninit::slice_as_ptr(written),
written.len(),
))
/// Allows users to write an integer (in signed decimal format) into a variable `buf` of
/// type [`NumBuffer`] that is passed by the caller by mutable reference.
///
/// # Examples
///
/// ```
/// #![feature(int_format_into)]
/// use core::fmt::NumBuffer;
///
/// let n = 0u128;
/// let mut buf = NumBuffer::new();
/// assert_eq!(n.format_into(&mut buf), "0");
///
/// let n1 = 32u128;
/// let mut buf1 = NumBuffer::new();
/// assert_eq!(n1.format_into(&mut buf1), "32");
///
/// let n2 = u128::MAX;
/// let mut buf2 = NumBuffer::new();
/// assert_eq!(n2.format_into(&mut buf2), u128::MAX.to_string());
/// ```
#[unstable(feature = "int_format_into", issue = "138215")]
pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
let diff = buf.capacity() - U128_MAX_DEC_N;
// FIXME: Once const generics are better, use `NumberBufferTrait::BUF_SIZE` as generic const
// for `fmt_u128_inner`.
//
// In the meantime, we have to use a slice starting at index 1 and add 1 to the returned
// offset to ensure the number is correctly generated at the end of the buffer.
// SAFETY: `diff` will always be between 0 and its initial value.
unsafe { self._fmt(buf.buf.get_unchecked_mut(diff..)) }
}
}
impl i128 {
/// Allows users to write an integer (in signed decimal format) into a variable `buf` of
/// type [`NumBuffer`] that is passed by the caller by mutable reference.
///
/// # Examples
///
/// ```
/// #![feature(int_format_into)]
/// use core::fmt::NumBuffer;
///
/// let n = 0i128;
/// let mut buf = NumBuffer::new();
/// assert_eq!(n.format_into(&mut buf), "0");
///
/// let n1 = i128::MIN;
/// assert_eq!(n1.format_into(&mut buf), i128::MIN.to_string());
///
/// let n2 = i128::MAX;
/// assert_eq!(n2.format_into(&mut buf), i128::MAX.to_string());
/// ```
#[unstable(feature = "int_format_into", issue = "138215")]
pub fn format_into(self, buf: &mut NumBuffer<Self>) -> &str {
let diff = buf.capacity() - U128_MAX_DEC_N;
// FIXME: Once const generics are better, use `NumberBufferTrait::BUF_SIZE` as generic const
// for `fmt_u128_inner`.
//
// In the meantime, we have to use a slice starting at index 1 and add 1 to the returned
// offset to ensure the number is correctly generated at the end of the buffer.
let mut offset =
// SAFETY: `buf` will always be big enough to contain all digits.
unsafe { self.unsigned_abs()._fmt_inner(buf.buf.get_unchecked_mut(diff..)) };
// We put back the offset at the right position.
offset += diff;
// Only difference between signed and unsigned are these 4 lines.
if self < 0 {
offset -= 1;
// SAFETY: `buf` will always be big enough to contain all digits plus the minus sign.
unsafe {
buf.buf.get_unchecked_mut(offset).write(b'-');
}
}
// SAFETY: Starting from `offset`, all elements of the slice have been set.
unsafe { slice_buffer_to_str(&buf.buf, offset) }
}
}

View file

@ -0,0 +1,60 @@
use crate::mem::MaybeUninit;
/// Trait used to describe the maximum number of digits in decimal base of the implemented integer.
#[unstable(feature = "int_format_into", issue = "138215")]
pub trait NumBufferTrait {
/// Maximum number of digits in decimal base of the implemented integer.
const BUF_SIZE: usize;
}
macro_rules! impl_NumBufferTrait {
($($signed:ident, $unsigned:ident,)*) => {
$(
#[unstable(feature = "int_format_into", issue = "138215")]
impl NumBufferTrait for $signed {
// `+ 2` and not `+ 1` to include the `-` character.
const BUF_SIZE: usize = $signed::MAX.ilog(10) as usize + 2;
}
#[unstable(feature = "int_format_into", issue = "138215")]
impl NumBufferTrait for $unsigned {
const BUF_SIZE: usize = $unsigned::MAX.ilog(10) as usize + 1;
}
)*
}
}
impl_NumBufferTrait! {
i8, u8,
i16, u16,
i32, u32,
i64, u64,
isize, usize,
i128, u128,
}
/// A buffer wrapper of which the internal size is based on the maximum
/// number of digits the associated integer can have.
#[unstable(feature = "int_format_into", issue = "138215")]
#[derive(Debug)]
pub struct NumBuffer<T: NumBufferTrait> {
// FIXME: Once const generics feature is working, use `T::BUF_SIZE` instead of 40.
pub(crate) buf: [MaybeUninit<u8>; 40],
// FIXME: Remove this field once we can actually use `T`.
phantom: core::marker::PhantomData<T>,
}
#[unstable(feature = "int_format_into", issue = "138215")]
impl<T: NumBufferTrait> NumBuffer<T> {
/// Initializes internal buffer.
#[unstable(feature = "int_format_into", issue = "138215")]
pub const fn new() -> Self {
// FIXME: Once const generics feature is working, use `T::BUF_SIZE` instead of 40.
NumBuffer { buf: [MaybeUninit::<u8>::uninit(); 40], phantom: core::marker::PhantomData }
}
/// Returns the length of the internal buffer.
#[unstable(feature = "int_format_into", issue = "138215")]
pub const fn capacity(&self) -> usize {
self.buf.len()
}
}