From 1f32d40ac3c51d6867fc6969e52e960452a40652 Mon Sep 17 00:00:00 2001 From: Tyson Nottingham Date: Thu, 10 Dec 2020 19:46:54 -0800 Subject: [PATCH] BufWriter: apply #[inline] / #[inline(never)] optimizations Ensure that `write` and `write_all` can be inlined and that their commonly executed fast paths can be as short as possible. `write_vectored` would likely benefit from the same optimization, but I omitted it because its implementation is more complex, and I don't have a benchmark on hand to guide its optimization. --- library/std/src/io/buffered/bufwriter.rs | 90 +++++++++++++++++------- 1 file changed, 66 insertions(+), 24 deletions(-) diff --git a/library/std/src/io/buffered/bufwriter.rs b/library/std/src/io/buffered/bufwriter.rs index 80f98bbbad36..ae72ddabfb9e 100644 --- a/library/std/src/io/buffered/bufwriter.rs +++ b/library/std/src/io/buffered/bufwriter.rs @@ -331,6 +331,52 @@ impl BufWriter { let buf = if !self.panicked { Ok(buf) } else { Err(WriterPanicked { buf }) }; (self.inner.take().unwrap(), buf) } + + // Ensure this function does not get inlined into `write`, so that it + // remains inlineable and its common path remains as short as possible. + // If this function ends up being called frequently relative to `write`, + // it's likely a sign that the client is using an improperly sized buffer. + #[inline(never)] + fn flush_and_write(&mut self, buf: &[u8]) -> io::Result { + self.flush_buf()?; + + // Why not len > capacity? To avoid a needless trip through the buffer when the + // input exactly fills. We'd just need to flush it to the underlying writer anyway. + if buf.len() >= self.buf.capacity() { + self.panicked = true; + let r = self.get_mut().write(buf); + self.panicked = false; + r + } else { + self.buf.extend_from_slice(buf); + Ok(buf.len()) + } + } + + // Ensure this function does not get inlined into `write_all`, so that it + // remains inlineable and its common path remains as short as possible. + // If this function ends up being called frequently relative to `write_all`, + // it's likely a sign that the client is using an improperly sized buffer. + #[inline(never)] + fn flush_and_write_all(&mut self, buf: &[u8]) -> io::Result<()> { + // Normally, `write_all` just calls `write` in a loop. We can do better + // by calling `self.get_mut().write_all()` directly, which avoids + // round trips through the buffer in the event of a series of partial + // writes in some circumstances. + self.flush_buf()?; + + // Why not len > capacity? To avoid a needless trip through the buffer when the + // input exactly fills. We'd just need to flush it to the underlying writer anyway. + if buf.len() >= self.buf.capacity() { + self.panicked = true; + let r = self.get_mut().write_all(buf); + self.panicked = false; + r + } else { + self.buf.extend_from_slice(buf); + Ok(()) + } + } } #[unstable(feature = "bufwriter_into_raw_parts", issue = "80690")] @@ -402,43 +448,39 @@ impl fmt::Debug for WriterPanicked { #[stable(feature = "rust1", since = "1.0.0")] impl Write for BufWriter { + #[inline] fn write(&mut self, buf: &[u8]) -> io::Result { - if self.buf.len() + buf.len() > self.buf.capacity() { - self.flush_buf()?; - } - // FIXME: Why no len > capacity? Why not buffer len == capacity? #72919 - if buf.len() >= self.buf.capacity() { - self.panicked = true; - let r = self.get_mut().write(buf); - self.panicked = false; - r - } else { + // The `buf.len() != self.buf.capacity()` check is done to avoid a needless trip through + // the buffer when the input is exactly the same size as it. For many clients, that is a + // rare event, so it's unfortunate that the check is in the common code path. But it + // prevents pathological cases for other clients which *always* make writes of this size. + // See #72919 and #79930 for more info and a breadcrumb trail. + if self.buf.len() + buf.len() <= self.buf.capacity() && buf.len() != self.buf.capacity() { self.buf.extend_from_slice(buf); Ok(buf.len()) + } else { + self.flush_and_write(buf) } } + #[inline] fn write_all(&mut self, buf: &[u8]) -> io::Result<()> { - // Normally, `write_all` just calls `write` in a loop. We can do better - // by calling `self.get_mut().write_all()` directly, which avoids - // round trips through the buffer in the event of a series of partial - // writes in some circumstances. - if self.buf.len() + buf.len() > self.buf.capacity() { - self.flush_buf()?; - } - // FIXME: Why no len > capacity? Why not buffer len == capacity? #72919 - if buf.len() >= self.buf.capacity() { - self.panicked = true; - let r = self.get_mut().write_all(buf); - self.panicked = false; - r - } else { + // The `buf.len() != self.buf.capacity()` check is done to avoid a needless trip through + // the buffer when the input is exactly the same size as it. For many clients, that is a + // rare event, so it's unfortunate that the check is in the common code path. But it + // prevents pathological cases for other clients which *always* make writes of this size. + // See #72919 and #79930 for more info and a breadcrumb trail. + if self.buf.len() + buf.len() <= self.buf.capacity() && buf.len() != self.buf.capacity() { self.buf.extend_from_slice(buf); Ok(()) + } else { + self.flush_and_write_all(buf) } } fn write_vectored(&mut self, bufs: &[IoSlice<'_>]) -> io::Result { + // FIXME: Consider applying `#[inline]` / `#[inline(never)]` optimizations already applied + // to `write` and `write_all`. The performance benefits can be significant. See #79930. if self.get_ref().is_write_vectored() { let total_len = bufs.iter().map(|b| b.len()).sum::(); if self.buf.len() + total_len > self.buf.capacity() {