rust/src/libcore/iter/range.rs
bors 932c736479 Auto merge of #48057 - scottmcm:less-match-more-compare, r=dtolnay
Simplify RangeInclusive::next[_back]

`match`ing on an `Option<Ordering>` seems cause some confusion for LLVM; switching to just using comparison operators removes a few jumps from the simple `for` loops I was trying.

cc https://github.com/rust-lang/rust/issues/45222 https://github.com/rust-lang/rust/issues/28237#issuecomment-363706510

Example:
```rust
#[no_mangle]
pub fn coresum(x: std::ops::RangeInclusive<u64>) -> u64 {
    let mut sum = 0;
    for i in x {
        sum += i ^ (i-1);
    }
    sum
}
```
Today:
```asm
coresum:
    xor r8d, r8d
    mov r9, -1
    xor eax, eax
    jmp .LBB0_1
.LBB0_4:
    lea rcx, [rdi - 1]
    xor rcx, rdi
    add rax, rcx
    mov rsi, rdx
    mov rdi, r10
.LBB0_1:
    cmp rdi, rsi
    mov ecx, 1
    cmovb   rcx, r9
    cmove   rcx, r8
    test    rcx, rcx
    mov edx, 0
    mov r10d, 1
    je  .LBB0_4         // 1
    cmp rcx, -1
    jne .LBB0_5         // 2
    lea r10, [rdi + 1]
    mov rdx, rsi
    jmp .LBB0_4         // 3
.LBB0_5:
    ret
```
With this PR:
```asm
coresum:
	cmp	rcx, rdx
	jbe	.LBB0_2
	xor	eax, eax
	ret
.LBB0_2:
	xor	r8d, r8d
	mov	r9d, 1
	xor	eax, eax
	.p2align	4, 0x90
.LBB0_3:
	lea	r10, [rcx + 1]
	cmp	rcx, rdx
	cmovae	rdx, r8
	cmovae	r10, r9
	lea	r11, [rcx - 1]
	xor	r11, rcx
	add	rax, r11
	mov	rcx, r10
	cmp	r10, rdx
	jbe	.LBB0_3         // Just this
	ret
```

<details><summary>Though using internal iteration (`.map(|i| i ^ (i-1)).sum()`) is still shorter to type, and lets the compiler unroll it</summary>

```asm
coresum_inner:
.Lcfi0:
.seh_proc coresum_inner
	sub	rsp, 168
.Lcfi1:
	.seh_stackalloc 168
	vmovdqa	xmmword ptr [rsp + 144], xmm15
.Lcfi2:
	.seh_savexmm 15, 144
	vmovdqa	xmmword ptr [rsp + 128], xmm14
.Lcfi3:
	.seh_savexmm 14, 128
	vmovdqa	xmmword ptr [rsp + 112], xmm13
.Lcfi4:
	.seh_savexmm 13, 112
	vmovdqa	xmmword ptr [rsp + 96], xmm12
.Lcfi5:
	.seh_savexmm 12, 96
	vmovdqa	xmmword ptr [rsp + 80], xmm11
.Lcfi6:
	.seh_savexmm 11, 80
	vmovdqa	xmmword ptr [rsp + 64], xmm10
.Lcfi7:
	.seh_savexmm 10, 64
	vmovdqa	xmmword ptr [rsp + 48], xmm9
.Lcfi8:
	.seh_savexmm 9, 48
	vmovdqa	xmmword ptr [rsp + 32], xmm8
.Lcfi9:
	.seh_savexmm 8, 32
	vmovdqa	xmmword ptr [rsp + 16], xmm7
.Lcfi10:
	.seh_savexmm 7, 16
	vmovdqa	xmmword ptr [rsp], xmm6
.Lcfi11:
	.seh_savexmm 6, 0
.Lcfi12:
	.seh_endprologue
	cmp	rdx, rcx
	jae	.LBB1_2
	xor	eax, eax
	jmp	.LBB1_13
.LBB1_2:
	mov	r8, rdx
	sub	r8, rcx
	jbe	.LBB1_3
	cmp	r8, 7
	jbe	.LBB1_5
	mov	rax, r8
	and	rax, -8
	mov	r9, r8
	and	r9, -8
	je	.LBB1_5
	add	rax, rcx
	vmovq	xmm0, rcx
	vpshufd	xmm0, xmm0, 68
	mov	ecx, 1
	vmovq	xmm1, rcx
	vpslldq	xmm1, xmm1, 8
	vpaddq	xmm1, xmm0, xmm1
	vpxor	xmm0, xmm0, xmm0
	vpcmpeqd	xmm11, xmm11, xmm11
	vmovdqa	xmm12, xmmword ptr [rip + __xmm@00000000000000010000000000000001]
	vmovdqa	xmm13, xmmword ptr [rip + __xmm@00000000000000030000000000000003]
	vmovdqa	xmm14, xmmword ptr [rip + __xmm@00000000000000050000000000000005]
	vmovdqa	xmm15, xmmword ptr [rip + __xmm@00000000000000080000000000000008]
	mov	rcx, r9
	vpxor	xmm4, xmm4, xmm4
	vpxor	xmm5, xmm5, xmm5
	vpxor	xmm6, xmm6, xmm6
	.p2align	4, 0x90
.LBB1_9:
	vpaddq	xmm7, xmm1, xmmword ptr [rip + __xmm@00000000000000020000000000000002]
	vpaddq	xmm9, xmm1, xmmword ptr [rip + __xmm@00000000000000040000000000000004]
	vpaddq	xmm10, xmm1, xmmword ptr [rip + __xmm@00000000000000060000000000000006]
	vpaddq	xmm8, xmm1, xmm12
	vpxor	xmm7, xmm8, xmm7
	vpaddq	xmm2, xmm1, xmm13
	vpxor	xmm8, xmm2, xmm9
	vpaddq	xmm3, xmm1, xmm14
	vpxor	xmm3, xmm3, xmm10
	vpaddq	xmm2, xmm1, xmm11
	vpxor	xmm2, xmm2, xmm1
	vpaddq	xmm0, xmm2, xmm0
	vpaddq	xmm4, xmm7, xmm4
	vpaddq	xmm5, xmm8, xmm5
	vpaddq	xmm6, xmm3, xmm6
	vpaddq	xmm1, xmm1, xmm15
	add	rcx, -8
	jne	.LBB1_9
	vpaddq	xmm0, xmm4, xmm0
	vpaddq	xmm0, xmm5, xmm0
	vpaddq	xmm0, xmm6, xmm0
	vpshufd	xmm1, xmm0, 78
	vpaddq	xmm0, xmm0, xmm1
	vmovq	r10, xmm0
	cmp	r8, r9
	jne	.LBB1_6
	jmp	.LBB1_11
.LBB1_3:
	xor	r10d, r10d
	jmp	.LBB1_12
.LBB1_5:
	xor	r10d, r10d
	mov	rax, rcx
	.p2align	4, 0x90
.LBB1_6:
	lea	rcx, [rax - 1]
	xor	rcx, rax
	inc	rax
	add	r10, rcx
	cmp	rdx, rax
	jne	.LBB1_6
.LBB1_11:
	mov	rcx, rdx
.LBB1_12:
	lea	rax, [rcx - 1]
	xor	rax, rcx
	add	rax, r10
.LBB1_13:
	vmovaps	xmm6, xmmword ptr [rsp]
	vmovaps	xmm7, xmmword ptr [rsp + 16]
	vmovaps	xmm8, xmmword ptr [rsp + 32]
	vmovaps	xmm9, xmmword ptr [rsp + 48]
	vmovaps	xmm10, xmmword ptr [rsp + 64]
	vmovaps	xmm11, xmmword ptr [rsp + 80]
	vmovaps	xmm12, xmmword ptr [rsp + 96]
	vmovaps	xmm13, xmmword ptr [rsp + 112]
	vmovaps	xmm14, xmmword ptr [rsp + 128]
	vmovaps	xmm15, xmmword ptr [rsp + 144]
	add	rsp, 168
	ret
	.seh_handlerdata
	.section	.text,"xr",one_only,coresum_inner
.Lcfi13:
	.seh_endproc
```

</details>
2018-02-08 06:38:30 +00:00

467 lines
14 KiB
Rust

// Copyright 2013-2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use convert::TryFrom;
use mem;
use ops::{self, Add, Sub, Try};
use usize;
use super::{FusedIterator, TrustedLen};
/// Objects that can be stepped over in both directions.
///
/// The `steps_between` function provides a way to efficiently compare
/// two `Step` objects.
#[unstable(feature = "step_trait",
reason = "likely to be replaced by finer-grained traits",
issue = "42168")]
pub trait Step: Clone + PartialOrd + Sized {
/// Returns the number of steps between two step objects. The count is
/// inclusive of `start` and exclusive of `end`.
///
/// Returns `None` if it is not possible to calculate `steps_between`
/// without overflow.
fn steps_between(start: &Self, end: &Self) -> Option<usize>;
/// Replaces this step with `1`, returning itself
fn replace_one(&mut self) -> Self;
/// Replaces this step with `0`, returning itself
fn replace_zero(&mut self) -> Self;
/// Adds one to this step, returning the result
fn add_one(&self) -> Self;
/// Subtracts one to this step, returning the result
fn sub_one(&self) -> Self;
/// Add an usize, returning None on overflow
fn add_usize(&self, n: usize) -> Option<Self>;
}
// These are still macro-generated because the integer literals resolve to different types.
macro_rules! step_identical_methods {
() => {
#[inline]
fn replace_one(&mut self) -> Self {
mem::replace(self, 1)
}
#[inline]
fn replace_zero(&mut self) -> Self {
mem::replace(self, 0)
}
#[inline]
fn add_one(&self) -> Self {
Add::add(*self, 1)
}
#[inline]
fn sub_one(&self) -> Self {
Sub::sub(*self, 1)
}
}
}
macro_rules! step_impl_unsigned {
($($t:ty)*) => ($(
#[unstable(feature = "step_trait",
reason = "likely to be replaced by finer-grained traits",
issue = "42168")]
impl Step for $t {
#[inline]
#[allow(trivial_numeric_casts)]
fn steps_between(start: &$t, end: &$t) -> Option<usize> {
if *start < *end {
// Note: We assume $t <= usize here
Some((*end - *start) as usize)
} else {
Some(0)
}
}
#[inline]
#[allow(unreachable_patterns)]
fn add_usize(&self, n: usize) -> Option<Self> {
match <$t>::try_from(n) {
Ok(n_as_t) => self.checked_add(n_as_t),
Err(_) => None,
}
}
step_identical_methods!();
}
)*)
}
macro_rules! step_impl_signed {
($( [$t:ty : $unsigned:ty] )*) => ($(
#[unstable(feature = "step_trait",
reason = "likely to be replaced by finer-grained traits",
issue = "42168")]
impl Step for $t {
#[inline]
#[allow(trivial_numeric_casts)]
fn steps_between(start: &$t, end: &$t) -> Option<usize> {
if *start < *end {
// Note: We assume $t <= isize here
// Use .wrapping_sub and cast to usize to compute the
// difference that may not fit inside the range of isize.
Some((*end as isize).wrapping_sub(*start as isize) as usize)
} else {
Some(0)
}
}
#[inline]
#[allow(unreachable_patterns)]
fn add_usize(&self, n: usize) -> Option<Self> {
match <$unsigned>::try_from(n) {
Ok(n_as_unsigned) => {
// Wrapping in unsigned space handles cases like
// `-120_i8.add_usize(200) == Some(80_i8)`,
// even though 200_usize is out of range for i8.
let wrapped = (*self as $unsigned).wrapping_add(n_as_unsigned) as $t;
if wrapped >= *self {
Some(wrapped)
} else {
None // Addition overflowed
}
}
Err(_) => None,
}
}
step_identical_methods!();
}
)*)
}
macro_rules! step_impl_no_between {
($($t:ty)*) => ($(
#[unstable(feature = "step_trait",
reason = "likely to be replaced by finer-grained traits",
issue = "42168")]
impl Step for $t {
#[inline]
fn steps_between(_start: &Self, _end: &Self) -> Option<usize> {
None
}
#[inline]
fn add_usize(&self, n: usize) -> Option<Self> {
self.checked_add(n as $t)
}
step_identical_methods!();
}
)*)
}
step_impl_unsigned!(usize u8 u16 u32);
step_impl_signed!([isize: usize] [i8: u8] [i16: u16] [i32: u32]);
#[cfg(target_pointer_width = "64")]
step_impl_unsigned!(u64);
#[cfg(target_pointer_width = "64")]
step_impl_signed!([i64: u64]);
// If the target pointer width is not 64-bits, we
// assume here that it is less than 64-bits.
#[cfg(not(target_pointer_width = "64"))]
step_impl_no_between!(u64 i64);
step_impl_no_between!(u128 i128);
macro_rules! range_exact_iter_impl {
($($t:ty)*) => ($(
#[stable(feature = "rust1", since = "1.0.0")]
impl ExactSizeIterator for ops::Range<$t> { }
)*)
}
macro_rules! range_incl_exact_iter_impl {
($($t:ty)*) => ($(
#[unstable(feature = "inclusive_range",
reason = "recently added, follows RFC",
issue = "28237")]
impl ExactSizeIterator for ops::RangeInclusive<$t> { }
)*)
}
macro_rules! range_trusted_len_impl {
($($t:ty)*) => ($(
#[unstable(feature = "trusted_len", issue = "37572")]
unsafe impl TrustedLen for ops::Range<$t> { }
)*)
}
macro_rules! range_incl_trusted_len_impl {
($($t:ty)*) => ($(
#[unstable(feature = "inclusive_range",
reason = "recently added, follows RFC",
issue = "28237")]
unsafe impl TrustedLen for ops::RangeInclusive<$t> { }
)*)
}
#[stable(feature = "rust1", since = "1.0.0")]
impl<A: Step> Iterator for ops::Range<A> {
type Item = A;
#[inline]
fn next(&mut self) -> Option<A> {
if self.start < self.end {
// We check for overflow here, even though it can't actually
// happen. Adding this check does however help llvm vectorize loops
// for some ranges that don't get vectorized otherwise,
// and this won't actually result in an extra check in an optimized build.
if let Some(mut n) = self.start.add_usize(1) {
mem::swap(&mut n, &mut self.start);
Some(n)
} else {
None
}
} else {
None
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
match Step::steps_between(&self.start, &self.end) {
Some(hint) => (hint, Some(hint)),
None => (0, None)
}
}
#[inline]
fn nth(&mut self, n: usize) -> Option<A> {
if let Some(plus_n) = self.start.add_usize(n) {
if plus_n < self.end {
self.start = plus_n.add_one();
return Some(plus_n)
}
}
self.start = self.end.clone();
None
}
#[inline]
fn last(mut self) -> Option<A> {
self.next_back()
}
#[inline]
fn min(mut self) -> Option<A> {
self.next()
}
#[inline]
fn max(mut self) -> Option<A> {
self.next_back()
}
}
// These macros generate `ExactSizeIterator` impls for various range types.
// Range<{u,i}64> and RangeInclusive<{u,i}{32,64,size}> are excluded
// because they cannot guarantee having a length <= usize::MAX, which is
// required by ExactSizeIterator.
range_exact_iter_impl!(usize u8 u16 u32 isize i8 i16 i32);
range_incl_exact_iter_impl!(u8 u16 i8 i16);
// These macros generate `TrustedLen` impls.
//
// They need to guarantee that .size_hint() is either exact, or that
// the upper bound is None when it does not fit the type limits.
range_trusted_len_impl!(usize isize u8 i8 u16 i16 u32 i32 i64 u64);
range_incl_trusted_len_impl!(usize isize u8 i8 u16 i16 u32 i32 i64 u64);
#[stable(feature = "rust1", since = "1.0.0")]
impl<A: Step> DoubleEndedIterator for ops::Range<A> {
#[inline]
fn next_back(&mut self) -> Option<A> {
if self.start < self.end {
self.end = self.end.sub_one();
Some(self.end.clone())
} else {
None
}
}
}
#[unstable(feature = "fused", issue = "35602")]
impl<A: Step> FusedIterator for ops::Range<A> {}
#[stable(feature = "rust1", since = "1.0.0")]
impl<A: Step> Iterator for ops::RangeFrom<A> {
type Item = A;
#[inline]
fn next(&mut self) -> Option<A> {
let mut n = self.start.add_one();
mem::swap(&mut n, &mut self.start);
Some(n)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(usize::MAX, None)
}
#[inline]
fn nth(&mut self, n: usize) -> Option<A> {
let plus_n = self.start.add_usize(n).expect("overflow in RangeFrom::nth");
self.start = plus_n.add_one();
Some(plus_n)
}
}
#[unstable(feature = "fused", issue = "35602")]
impl<A: Step> FusedIterator for ops::RangeFrom<A> {}
#[unstable(feature = "trusted_len", issue = "37572")]
unsafe impl<A: Step> TrustedLen for ops::RangeFrom<A> {}
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
impl<A: Step> Iterator for ops::RangeInclusive<A> {
type Item = A;
#[inline]
fn next(&mut self) -> Option<A> {
if self.start <= self.end {
if self.start < self.end {
let n = self.start.add_one();
Some(mem::replace(&mut self.start, n))
} else {
let last = self.start.replace_one();
self.end.replace_zero();
Some(last)
}
} else {
None
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
if !(self.start <= self.end) {
return (0, Some(0));
}
match Step::steps_between(&self.start, &self.end) {
Some(hint) => (hint.saturating_add(1), hint.checked_add(1)),
None => (0, None),
}
}
#[inline]
fn nth(&mut self, n: usize) -> Option<A> {
if let Some(plus_n) = self.start.add_usize(n) {
use cmp::Ordering::*;
match plus_n.partial_cmp(&self.end) {
Some(Less) => {
self.start = plus_n.add_one();
return Some(plus_n)
}
Some(Equal) => {
self.start.replace_one();
self.end.replace_zero();
return Some(plus_n)
}
_ => {}
}
}
self.start.replace_one();
self.end.replace_zero();
None
}
#[inline]
fn last(mut self) -> Option<A> {
self.next_back()
}
#[inline]
fn min(mut self) -> Option<A> {
self.next()
}
#[inline]
fn max(mut self) -> Option<A> {
self.next_back()
}
#[inline]
fn try_fold<B, F, R>(&mut self, init: B, mut f: F) -> R where
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
{
let mut accum = init;
if self.start <= self.end {
loop {
let (x, done) =
if self.start < self.end {
let n = self.start.add_one();
(mem::replace(&mut self.start, n), false)
} else {
self.end.replace_zero();
(self.start.replace_one(), true)
};
accum = f(accum, x)?;
if done { break }
}
}
Try::from_ok(accum)
}
}
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
impl<A: Step> DoubleEndedIterator for ops::RangeInclusive<A> {
#[inline]
fn next_back(&mut self) -> Option<A> {
if self.start <= self.end {
if self.start < self.end {
let n = self.end.sub_one();
Some(mem::replace(&mut self.end, n))
} else {
let last = self.end.replace_zero();
self.start.replace_one();
Some(last)
}
} else {
None
}
}
#[inline]
fn try_rfold<B, F, R>(&mut self, init: B, mut f: F) -> R where
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
{
let mut accum = init;
if self.start <= self.end {
loop {
let (x, done) =
if self.start < self.end {
let n = self.end.sub_one();
(mem::replace(&mut self.end, n), false)
} else {
self.start.replace_one();
(self.end.replace_zero(), true)
};
accum = f(accum, x)?;
if done { break }
}
}
Try::from_ok(accum)
}
}
#[unstable(feature = "fused", issue = "35602")]
impl<A: Step> FusedIterator for ops::RangeInclusive<A> {}