RangeInclusive internal iteration performance improvement.

Specialize Iterator::try_fold and DoubleEndedIterator::try_rfold to
improve code generation in all internal iteration scenarios.

This changes brings the performance of internal iteration with
RangeInclusive on par with the performance of iteration with Range:

 - Single conditional jump in hot loop,
 - Unrolling and vectorization,
 - And even Closed Form substitution.

Unfortunately, it only applies to internal iteration. Despite various
attempts at stream-lining the implementation of next and next_back,
LLVM has stubbornly refused to optimize external iteration
appropriately, leaving me with a choice between:

 - The current implementation, for which Closed Form substitution is
   performed, but which uses 2 conditional jumps in the hot loop when
   optimization fail.
 - An implementation using a "is_done" boolean, which uses 1
   conditional jump in the hot loop when optimization fail, allowing
   unrolling and vectorization, but for which Closed Form substitution
   fails.

In the absence of any conclusive evidence as to which usecase matters
most, and with no assurance that the lack of Closed Form substitution
is not indicative of other optimizations being foiled, there is no way
to pick one implementation over the other, and thus I defer to the
statu quo as far as next and next_back are concerned.
This commit is contained in:
Matthieu M 2019-02-03 16:58:29 +01:00
parent cae623c5ce
commit eb5b096886
2 changed files with 50 additions and 3 deletions

View file

@ -1,6 +1,6 @@
use convert::TryFrom;
use mem;
use ops::{self, Add, Sub};
use ops::{self, Add, Sub, Try};
use usize;
use super::{FusedIterator, TrustedLen};
@ -368,11 +368,11 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
Some(Less) => {
self.is_empty = Some(false);
self.start = plus_n.add_one();
return Some(plus_n)
return Some(plus_n);
}
Some(Equal) => {
self.is_empty = Some(true);
return Some(plus_n)
return Some(plus_n);
}
_ => {}
}
@ -382,6 +382,29 @@ impl<A: Step> Iterator for ops::RangeInclusive<A> {
None
}
#[inline]
fn try_fold<B, F, R>(&mut self, init: B, mut f: F) -> R
where
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
{
self.compute_is_empty();
let mut accum = init;
while self.start < self.end {
let n = self.start.add_one();
let n = mem::replace(&mut self.start, n);
accum = f(accum, n)?;
}
if self.start == self.end {
accum = f(accum, self.start.clone())?;
}
self.is_empty = Some(true);
Try::from_ok(accum)
}
#[inline]
fn last(mut self) -> Option<A> {
self.next_back()
@ -415,6 +438,28 @@ impl<A: Step> DoubleEndedIterator for ops::RangeInclusive<A> {
self.end.clone()
})
}
#[inline]
fn try_rfold<B, F, R>(&mut self, init: B, mut f: F) -> R where
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
{
self.compute_is_empty();
let mut accum = init;
while self.start < self.end {
let n = self.end.sub_one();
let n = mem::replace(&mut self.end, n);
accum = f(accum, n)?;
}
if self.start == self.end {
accum = f(accum, self.start.clone())?;
}
self.is_empty = Some(true);
Try::from_ok(accum)
}
}
#[stable(feature = "fused", since = "1.26.0")]

View file

@ -334,12 +334,14 @@ pub struct RangeInclusive<Idx> {
trait RangeInclusiveEquality: Sized {
fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool;
}
impl<T> RangeInclusiveEquality for T {
#[inline]
default fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool {
range.is_empty.unwrap_or_default()
}
}
impl<T: PartialOrd> RangeInclusiveEquality for T {
#[inline]
fn canonicalized_is_empty(range: &RangeInclusive<Self>) -> bool {