Simplify RangeInclusive::next[_back] `match`ing on an `Option<Ordering>` seems cause some confusion for LLVM; switching to just using comparison operators removes a few jumps from the simple `for` loops I was trying. cc https://github.com/rust-lang/rust/issues/45222 https://github.com/rust-lang/rust/issues/28237#issuecomment-363706510 Example: ```rust #[no_mangle] pub fn coresum(x: std::ops::RangeInclusive<u64>) -> u64 { let mut sum = 0; for i in x { sum += i ^ (i-1); } sum } ``` Today: ```asm coresum: xor r8d, r8d mov r9, -1 xor eax, eax jmp .LBB0_1 .LBB0_4: lea rcx, [rdi - 1] xor rcx, rdi add rax, rcx mov rsi, rdx mov rdi, r10 .LBB0_1: cmp rdi, rsi mov ecx, 1 cmovb rcx, r9 cmove rcx, r8 test rcx, rcx mov edx, 0 mov r10d, 1 je .LBB0_4 // 1 cmp rcx, -1 jne .LBB0_5 // 2 lea r10, [rdi + 1] mov rdx, rsi jmp .LBB0_4 // 3 .LBB0_5: ret ``` With this PR: ```asm coresum: cmp rcx, rdx jbe .LBB0_2 xor eax, eax ret .LBB0_2: xor r8d, r8d mov r9d, 1 xor eax, eax .p2align 4, 0x90 .LBB0_3: lea r10, [rcx + 1] cmp rcx, rdx cmovae rdx, r8 cmovae r10, r9 lea r11, [rcx - 1] xor r11, rcx add rax, r11 mov rcx, r10 cmp r10, rdx jbe .LBB0_3 // Just this ret ``` <details><summary>Though using internal iteration (`.map(|i| i ^ (i-1)).sum()`) is still shorter to type, and lets the compiler unroll it</summary> ```asm coresum_inner: .Lcfi0: .seh_proc coresum_inner sub rsp, 168 .Lcfi1: .seh_stackalloc 168 vmovdqa xmmword ptr [rsp + 144], xmm15 .Lcfi2: .seh_savexmm 15, 144 vmovdqa xmmword ptr [rsp + 128], xmm14 .Lcfi3: .seh_savexmm 14, 128 vmovdqa xmmword ptr [rsp + 112], xmm13 .Lcfi4: .seh_savexmm 13, 112 vmovdqa xmmword ptr [rsp + 96], xmm12 .Lcfi5: .seh_savexmm 12, 96 vmovdqa xmmword ptr [rsp + 80], xmm11 .Lcfi6: .seh_savexmm 11, 80 vmovdqa xmmword ptr [rsp + 64], xmm10 .Lcfi7: .seh_savexmm 10, 64 vmovdqa xmmword ptr [rsp + 48], xmm9 .Lcfi8: .seh_savexmm 9, 48 vmovdqa xmmword ptr [rsp + 32], xmm8 .Lcfi9: .seh_savexmm 8, 32 vmovdqa xmmword ptr [rsp + 16], xmm7 .Lcfi10: .seh_savexmm 7, 16 vmovdqa xmmword ptr [rsp], xmm6 .Lcfi11: .seh_savexmm 6, 0 .Lcfi12: .seh_endprologue cmp rdx, rcx jae .LBB1_2 xor eax, eax jmp .LBB1_13 .LBB1_2: mov r8, rdx sub r8, rcx jbe .LBB1_3 cmp r8, 7 jbe .LBB1_5 mov rax, r8 and rax, -8 mov r9, r8 and r9, -8 je .LBB1_5 add rax, rcx vmovq xmm0, rcx vpshufd xmm0, xmm0, 68 mov ecx, 1 vmovq xmm1, rcx vpslldq xmm1, xmm1, 8 vpaddq xmm1, xmm0, xmm1 vpxor xmm0, xmm0, xmm0 vpcmpeqd xmm11, xmm11, xmm11 vmovdqa xmm12, xmmword ptr [rip + __xmm@00000000000000010000000000000001] vmovdqa xmm13, xmmword ptr [rip + __xmm@00000000000000030000000000000003] vmovdqa xmm14, xmmword ptr [rip + __xmm@00000000000000050000000000000005] vmovdqa xmm15, xmmword ptr [rip + __xmm@00000000000000080000000000000008] mov rcx, r9 vpxor xmm4, xmm4, xmm4 vpxor xmm5, xmm5, xmm5 vpxor xmm6, xmm6, xmm6 .p2align 4, 0x90 .LBB1_9: vpaddq xmm7, xmm1, xmmword ptr [rip + __xmm@00000000000000020000000000000002] vpaddq xmm9, xmm1, xmmword ptr [rip + __xmm@00000000000000040000000000000004] vpaddq xmm10, xmm1, xmmword ptr [rip + __xmm@00000000000000060000000000000006] vpaddq xmm8, xmm1, xmm12 vpxor xmm7, xmm8, xmm7 vpaddq xmm2, xmm1, xmm13 vpxor xmm8, xmm2, xmm9 vpaddq xmm3, xmm1, xmm14 vpxor xmm3, xmm3, xmm10 vpaddq xmm2, xmm1, xmm11 vpxor xmm2, xmm2, xmm1 vpaddq xmm0, xmm2, xmm0 vpaddq xmm4, xmm7, xmm4 vpaddq xmm5, xmm8, xmm5 vpaddq xmm6, xmm3, xmm6 vpaddq xmm1, xmm1, xmm15 add rcx, -8 jne .LBB1_9 vpaddq xmm0, xmm4, xmm0 vpaddq xmm0, xmm5, xmm0 vpaddq xmm0, xmm6, xmm0 vpshufd xmm1, xmm0, 78 vpaddq xmm0, xmm0, xmm1 vmovq r10, xmm0 cmp r8, r9 jne .LBB1_6 jmp .LBB1_11 .LBB1_3: xor r10d, r10d jmp .LBB1_12 .LBB1_5: xor r10d, r10d mov rax, rcx .p2align 4, 0x90 .LBB1_6: lea rcx, [rax - 1] xor rcx, rax inc rax add r10, rcx cmp rdx, rax jne .LBB1_6 .LBB1_11: mov rcx, rdx .LBB1_12: lea rax, [rcx - 1] xor rax, rcx add rax, r10 .LBB1_13: vmovaps xmm6, xmmword ptr [rsp] vmovaps xmm7, xmmword ptr [rsp + 16] vmovaps xmm8, xmmword ptr [rsp + 32] vmovaps xmm9, xmmword ptr [rsp + 48] vmovaps xmm10, xmmword ptr [rsp + 64] vmovaps xmm11, xmmword ptr [rsp + 80] vmovaps xmm12, xmmword ptr [rsp + 96] vmovaps xmm13, xmmword ptr [rsp + 112] vmovaps xmm14, xmmword ptr [rsp + 128] vmovaps xmm15, xmmword ptr [rsp + 144] add rsp, 168 ret .seh_handlerdata .section .text,"xr",one_only,coresum_inner .Lcfi13: .seh_endproc ``` </details>
467 lines
14 KiB
Rust
467 lines
14 KiB
Rust
// Copyright 2013-2016 The Rust Project Developers. See the COPYRIGHT
|
|
// file at the top-level directory of this distribution and at
|
|
// http://rust-lang.org/COPYRIGHT.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
use convert::TryFrom;
|
|
use mem;
|
|
use ops::{self, Add, Sub, Try};
|
|
use usize;
|
|
|
|
use super::{FusedIterator, TrustedLen};
|
|
|
|
/// Objects that can be stepped over in both directions.
|
|
///
|
|
/// The `steps_between` function provides a way to efficiently compare
|
|
/// two `Step` objects.
|
|
#[unstable(feature = "step_trait",
|
|
reason = "likely to be replaced by finer-grained traits",
|
|
issue = "42168")]
|
|
pub trait Step: Clone + PartialOrd + Sized {
|
|
/// Returns the number of steps between two step objects. The count is
|
|
/// inclusive of `start` and exclusive of `end`.
|
|
///
|
|
/// Returns `None` if it is not possible to calculate `steps_between`
|
|
/// without overflow.
|
|
fn steps_between(start: &Self, end: &Self) -> Option<usize>;
|
|
|
|
/// Replaces this step with `1`, returning itself
|
|
fn replace_one(&mut self) -> Self;
|
|
|
|
/// Replaces this step with `0`, returning itself
|
|
fn replace_zero(&mut self) -> Self;
|
|
|
|
/// Adds one to this step, returning the result
|
|
fn add_one(&self) -> Self;
|
|
|
|
/// Subtracts one to this step, returning the result
|
|
fn sub_one(&self) -> Self;
|
|
|
|
/// Add an usize, returning None on overflow
|
|
fn add_usize(&self, n: usize) -> Option<Self>;
|
|
}
|
|
|
|
// These are still macro-generated because the integer literals resolve to different types.
|
|
macro_rules! step_identical_methods {
|
|
() => {
|
|
#[inline]
|
|
fn replace_one(&mut self) -> Self {
|
|
mem::replace(self, 1)
|
|
}
|
|
|
|
#[inline]
|
|
fn replace_zero(&mut self) -> Self {
|
|
mem::replace(self, 0)
|
|
}
|
|
|
|
#[inline]
|
|
fn add_one(&self) -> Self {
|
|
Add::add(*self, 1)
|
|
}
|
|
|
|
#[inline]
|
|
fn sub_one(&self) -> Self {
|
|
Sub::sub(*self, 1)
|
|
}
|
|
}
|
|
}
|
|
|
|
macro_rules! step_impl_unsigned {
|
|
($($t:ty)*) => ($(
|
|
#[unstable(feature = "step_trait",
|
|
reason = "likely to be replaced by finer-grained traits",
|
|
issue = "42168")]
|
|
impl Step for $t {
|
|
#[inline]
|
|
#[allow(trivial_numeric_casts)]
|
|
fn steps_between(start: &$t, end: &$t) -> Option<usize> {
|
|
if *start < *end {
|
|
// Note: We assume $t <= usize here
|
|
Some((*end - *start) as usize)
|
|
} else {
|
|
Some(0)
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
#[allow(unreachable_patterns)]
|
|
fn add_usize(&self, n: usize) -> Option<Self> {
|
|
match <$t>::try_from(n) {
|
|
Ok(n_as_t) => self.checked_add(n_as_t),
|
|
Err(_) => None,
|
|
}
|
|
}
|
|
|
|
step_identical_methods!();
|
|
}
|
|
)*)
|
|
}
|
|
macro_rules! step_impl_signed {
|
|
($( [$t:ty : $unsigned:ty] )*) => ($(
|
|
#[unstable(feature = "step_trait",
|
|
reason = "likely to be replaced by finer-grained traits",
|
|
issue = "42168")]
|
|
impl Step for $t {
|
|
#[inline]
|
|
#[allow(trivial_numeric_casts)]
|
|
fn steps_between(start: &$t, end: &$t) -> Option<usize> {
|
|
if *start < *end {
|
|
// Note: We assume $t <= isize here
|
|
// Use .wrapping_sub and cast to usize to compute the
|
|
// difference that may not fit inside the range of isize.
|
|
Some((*end as isize).wrapping_sub(*start as isize) as usize)
|
|
} else {
|
|
Some(0)
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
#[allow(unreachable_patterns)]
|
|
fn add_usize(&self, n: usize) -> Option<Self> {
|
|
match <$unsigned>::try_from(n) {
|
|
Ok(n_as_unsigned) => {
|
|
// Wrapping in unsigned space handles cases like
|
|
// `-120_i8.add_usize(200) == Some(80_i8)`,
|
|
// even though 200_usize is out of range for i8.
|
|
let wrapped = (*self as $unsigned).wrapping_add(n_as_unsigned) as $t;
|
|
if wrapped >= *self {
|
|
Some(wrapped)
|
|
} else {
|
|
None // Addition overflowed
|
|
}
|
|
}
|
|
Err(_) => None,
|
|
}
|
|
}
|
|
|
|
step_identical_methods!();
|
|
}
|
|
)*)
|
|
}
|
|
|
|
macro_rules! step_impl_no_between {
|
|
($($t:ty)*) => ($(
|
|
#[unstable(feature = "step_trait",
|
|
reason = "likely to be replaced by finer-grained traits",
|
|
issue = "42168")]
|
|
impl Step for $t {
|
|
#[inline]
|
|
fn steps_between(_start: &Self, _end: &Self) -> Option<usize> {
|
|
None
|
|
}
|
|
|
|
#[inline]
|
|
fn add_usize(&self, n: usize) -> Option<Self> {
|
|
self.checked_add(n as $t)
|
|
}
|
|
|
|
step_identical_methods!();
|
|
}
|
|
)*)
|
|
}
|
|
|
|
step_impl_unsigned!(usize u8 u16 u32);
|
|
step_impl_signed!([isize: usize] [i8: u8] [i16: u16] [i32: u32]);
|
|
#[cfg(target_pointer_width = "64")]
|
|
step_impl_unsigned!(u64);
|
|
#[cfg(target_pointer_width = "64")]
|
|
step_impl_signed!([i64: u64]);
|
|
// If the target pointer width is not 64-bits, we
|
|
// assume here that it is less than 64-bits.
|
|
#[cfg(not(target_pointer_width = "64"))]
|
|
step_impl_no_between!(u64 i64);
|
|
step_impl_no_between!(u128 i128);
|
|
|
|
macro_rules! range_exact_iter_impl {
|
|
($($t:ty)*) => ($(
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
impl ExactSizeIterator for ops::Range<$t> { }
|
|
)*)
|
|
}
|
|
|
|
macro_rules! range_incl_exact_iter_impl {
|
|
($($t:ty)*) => ($(
|
|
#[unstable(feature = "inclusive_range",
|
|
reason = "recently added, follows RFC",
|
|
issue = "28237")]
|
|
impl ExactSizeIterator for ops::RangeInclusive<$t> { }
|
|
)*)
|
|
}
|
|
|
|
macro_rules! range_trusted_len_impl {
|
|
($($t:ty)*) => ($(
|
|
#[unstable(feature = "trusted_len", issue = "37572")]
|
|
unsafe impl TrustedLen for ops::Range<$t> { }
|
|
)*)
|
|
}
|
|
|
|
macro_rules! range_incl_trusted_len_impl {
|
|
($($t:ty)*) => ($(
|
|
#[unstable(feature = "inclusive_range",
|
|
reason = "recently added, follows RFC",
|
|
issue = "28237")]
|
|
unsafe impl TrustedLen for ops::RangeInclusive<$t> { }
|
|
)*)
|
|
}
|
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
impl<A: Step> Iterator for ops::Range<A> {
|
|
type Item = A;
|
|
|
|
#[inline]
|
|
fn next(&mut self) -> Option<A> {
|
|
if self.start < self.end {
|
|
// We check for overflow here, even though it can't actually
|
|
// happen. Adding this check does however help llvm vectorize loops
|
|
// for some ranges that don't get vectorized otherwise,
|
|
// and this won't actually result in an extra check in an optimized build.
|
|
if let Some(mut n) = self.start.add_usize(1) {
|
|
mem::swap(&mut n, &mut self.start);
|
|
Some(n)
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
match Step::steps_between(&self.start, &self.end) {
|
|
Some(hint) => (hint, Some(hint)),
|
|
None => (0, None)
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn nth(&mut self, n: usize) -> Option<A> {
|
|
if let Some(plus_n) = self.start.add_usize(n) {
|
|
if plus_n < self.end {
|
|
self.start = plus_n.add_one();
|
|
return Some(plus_n)
|
|
}
|
|
}
|
|
|
|
self.start = self.end.clone();
|
|
None
|
|
}
|
|
|
|
#[inline]
|
|
fn last(mut self) -> Option<A> {
|
|
self.next_back()
|
|
}
|
|
|
|
#[inline]
|
|
fn min(mut self) -> Option<A> {
|
|
self.next()
|
|
}
|
|
|
|
#[inline]
|
|
fn max(mut self) -> Option<A> {
|
|
self.next_back()
|
|
}
|
|
}
|
|
|
|
// These macros generate `ExactSizeIterator` impls for various range types.
|
|
// Range<{u,i}64> and RangeInclusive<{u,i}{32,64,size}> are excluded
|
|
// because they cannot guarantee having a length <= usize::MAX, which is
|
|
// required by ExactSizeIterator.
|
|
range_exact_iter_impl!(usize u8 u16 u32 isize i8 i16 i32);
|
|
range_incl_exact_iter_impl!(u8 u16 i8 i16);
|
|
|
|
// These macros generate `TrustedLen` impls.
|
|
//
|
|
// They need to guarantee that .size_hint() is either exact, or that
|
|
// the upper bound is None when it does not fit the type limits.
|
|
range_trusted_len_impl!(usize isize u8 i8 u16 i16 u32 i32 i64 u64);
|
|
range_incl_trusted_len_impl!(usize isize u8 i8 u16 i16 u32 i32 i64 u64);
|
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
impl<A: Step> DoubleEndedIterator for ops::Range<A> {
|
|
#[inline]
|
|
fn next_back(&mut self) -> Option<A> {
|
|
if self.start < self.end {
|
|
self.end = self.end.sub_one();
|
|
Some(self.end.clone())
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
}
|
|
|
|
#[unstable(feature = "fused", issue = "35602")]
|
|
impl<A: Step> FusedIterator for ops::Range<A> {}
|
|
|
|
#[stable(feature = "rust1", since = "1.0.0")]
|
|
impl<A: Step> Iterator for ops::RangeFrom<A> {
|
|
type Item = A;
|
|
|
|
#[inline]
|
|
fn next(&mut self) -> Option<A> {
|
|
let mut n = self.start.add_one();
|
|
mem::swap(&mut n, &mut self.start);
|
|
Some(n)
|
|
}
|
|
|
|
#[inline]
|
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
(usize::MAX, None)
|
|
}
|
|
|
|
#[inline]
|
|
fn nth(&mut self, n: usize) -> Option<A> {
|
|
let plus_n = self.start.add_usize(n).expect("overflow in RangeFrom::nth");
|
|
self.start = plus_n.add_one();
|
|
Some(plus_n)
|
|
}
|
|
}
|
|
|
|
#[unstable(feature = "fused", issue = "35602")]
|
|
impl<A: Step> FusedIterator for ops::RangeFrom<A> {}
|
|
|
|
#[unstable(feature = "trusted_len", issue = "37572")]
|
|
unsafe impl<A: Step> TrustedLen for ops::RangeFrom<A> {}
|
|
|
|
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
|
|
impl<A: Step> Iterator for ops::RangeInclusive<A> {
|
|
type Item = A;
|
|
|
|
#[inline]
|
|
fn next(&mut self) -> Option<A> {
|
|
if self.start <= self.end {
|
|
if self.start < self.end {
|
|
let n = self.start.add_one();
|
|
Some(mem::replace(&mut self.start, n))
|
|
} else {
|
|
let last = self.start.replace_one();
|
|
self.end.replace_zero();
|
|
Some(last)
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn size_hint(&self) -> (usize, Option<usize>) {
|
|
if !(self.start <= self.end) {
|
|
return (0, Some(0));
|
|
}
|
|
|
|
match Step::steps_between(&self.start, &self.end) {
|
|
Some(hint) => (hint.saturating_add(1), hint.checked_add(1)),
|
|
None => (0, None),
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn nth(&mut self, n: usize) -> Option<A> {
|
|
if let Some(plus_n) = self.start.add_usize(n) {
|
|
use cmp::Ordering::*;
|
|
|
|
match plus_n.partial_cmp(&self.end) {
|
|
Some(Less) => {
|
|
self.start = plus_n.add_one();
|
|
return Some(plus_n)
|
|
}
|
|
Some(Equal) => {
|
|
self.start.replace_one();
|
|
self.end.replace_zero();
|
|
return Some(plus_n)
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
|
|
self.start.replace_one();
|
|
self.end.replace_zero();
|
|
None
|
|
}
|
|
|
|
#[inline]
|
|
fn last(mut self) -> Option<A> {
|
|
self.next_back()
|
|
}
|
|
|
|
#[inline]
|
|
fn min(mut self) -> Option<A> {
|
|
self.next()
|
|
}
|
|
|
|
#[inline]
|
|
fn max(mut self) -> Option<A> {
|
|
self.next_back()
|
|
}
|
|
|
|
#[inline]
|
|
fn try_fold<B, F, R>(&mut self, init: B, mut f: F) -> R where
|
|
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
|
|
{
|
|
let mut accum = init;
|
|
if self.start <= self.end {
|
|
loop {
|
|
let (x, done) =
|
|
if self.start < self.end {
|
|
let n = self.start.add_one();
|
|
(mem::replace(&mut self.start, n), false)
|
|
} else {
|
|
self.end.replace_zero();
|
|
(self.start.replace_one(), true)
|
|
};
|
|
accum = f(accum, x)?;
|
|
if done { break }
|
|
}
|
|
}
|
|
Try::from_ok(accum)
|
|
}
|
|
}
|
|
|
|
#[unstable(feature = "inclusive_range", reason = "recently added, follows RFC", issue = "28237")]
|
|
impl<A: Step> DoubleEndedIterator for ops::RangeInclusive<A> {
|
|
#[inline]
|
|
fn next_back(&mut self) -> Option<A> {
|
|
if self.start <= self.end {
|
|
if self.start < self.end {
|
|
let n = self.end.sub_one();
|
|
Some(mem::replace(&mut self.end, n))
|
|
} else {
|
|
let last = self.end.replace_zero();
|
|
self.start.replace_one();
|
|
Some(last)
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
#[inline]
|
|
fn try_rfold<B, F, R>(&mut self, init: B, mut f: F) -> R where
|
|
Self: Sized, F: FnMut(B, Self::Item) -> R, R: Try<Ok=B>
|
|
{
|
|
let mut accum = init;
|
|
if self.start <= self.end {
|
|
loop {
|
|
let (x, done) =
|
|
if self.start < self.end {
|
|
let n = self.end.sub_one();
|
|
(mem::replace(&mut self.end, n), false)
|
|
} else {
|
|
self.start.replace_one();
|
|
(self.end.replace_zero(), true)
|
|
};
|
|
accum = f(accum, x)?;
|
|
if done { break }
|
|
}
|
|
}
|
|
Try::from_ok(accum)
|
|
}
|
|
}
|
|
|
|
#[unstable(feature = "fused", issue = "35602")]
|
|
impl<A: Step> FusedIterator for ops::RangeInclusive<A> {}
|