rollup merge of #24846: dotdash/fast_cttz8
Currently, LLVM lowers a cttz8 on x86_64 to these instructions:
```asm
movzbl %dil, %eax
bsfl %eax, %eax
movl $32, %ecx
cmovnel %eax, %ecx
cmpl $32, %ecx
movl $8, %eax
cmovnel %ecx, %eax
```
To improve the codegen, we can zero extend the 8 bit integer, then set
bit 8 and perform a cttz operation on the extended value. That way
there's no conditional operation involved at all.
This was discovered by this benchmark: https://github.com/Kimundi/long_strings_without_repeats
Timings on my box with the current nightly:
```
running 4 tests
test bench_cpp_naive_big ... bench: 5479222 ns/iter (+/- 254222)
test bench_noop_big ... bench: 571405 ns/iter (+/- 111950)
test bench_rust_naive_big ... bench: 7798102 ns/iter (+/- 148841)
test bench_rust_unsafe_big ... bench: 6606488 ns/iter (+/- 67529)
```
Timings with the patch applied:
```
running 4 tests
test bench_cpp_naive_big ... bench: 5470944 ns/iter (+/- 7109)
test bench_noop_big ... bench: 568944 ns/iter (+/- 6895)
test bench_rust_naive_big ... bench: 6795901 ns/iter (+/- 43806)
test bench_rust_unsafe_big ... bench: 5584879 ns/iter (+/- 5291)
```
This commit is contained in:
commit
41ee6df261
2 changed files with 14 additions and 6 deletions
|
|
@ -745,7 +745,20 @@ macro_rules! uint_impl {
|
|||
#[stable(feature = "rust1", since = "1.0.0")]
|
||||
#[inline]
|
||||
pub fn trailing_zeros(self) -> u32 {
|
||||
unsafe { $cttz(self as $ActualT) as u32 }
|
||||
// As of LLVM 3.6 the codegen for the zero-safe cttz8 intrinsic
|
||||
// emits two conditional moves on x86_64. By promoting the value to
|
||||
// u16 and setting bit 8, we get better code without any conditional
|
||||
// operations.
|
||||
// FIXME: There's a LLVM patch (http://reviews.llvm.org/D9284)
|
||||
// pending, remove this workaround once LLVM generates better code
|
||||
// for cttz8.
|
||||
unsafe {
|
||||
if $BITS == 8 {
|
||||
intrinsics::cttz16(self as u16 | 0x100) as u32
|
||||
} else {
|
||||
$cttz(self as $ActualT) as u32
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shifts the bits to the left by a specified amount, `n`,
|
||||
|
|
|
|||
|
|
@ -109,11 +109,6 @@ pub fn main() {
|
|||
assert_eq!(cttz32(100), 2);
|
||||
assert_eq!(cttz64(100), 2);
|
||||
|
||||
assert_eq!(cttz8(-1), 0);
|
||||
assert_eq!(cttz16(-1), 0);
|
||||
assert_eq!(cttz32(-1), 0);
|
||||
assert_eq!(cttz64(-1), 0);
|
||||
|
||||
assert_eq!(bswap16(0x0A0B), 0x0B0A);
|
||||
assert_eq!(bswap32(0x0ABBCC0D), 0x0DCCBB0A);
|
||||
assert_eq!(bswap64(0x0122334455667708), 0x0877665544332201);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue