Shrink heapsort further by combining sift_down loops

This commit is contained in:
Lukas Bergdoll 2024-09-04 19:37:49 +02:00
parent f2d4198d6e
commit adb0e27acd
3 changed files with 22 additions and 26 deletions

View file

@ -1,46 +1,46 @@
//! This module contains a branchless heapsort as fallback for unstable quicksort.
use crate::{intrinsics, ptr};
use crate::{cmp, intrinsics, ptr};
/// Sorts `v` using heapsort, which guarantees *O*(*n* \* log(*n*)) worst-case.
///
/// Never inline this, it sits the main hot-loop in `recurse` and is meant as unlikely algorithmic
/// fallback.
///
/// SAFETY: The caller has to guarantee that `v.len()` >= 2.
#[inline(never)]
pub(crate) unsafe fn heapsort<T, F>(v: &mut [T], is_less: &mut F)
pub(crate) fn heapsort<T, F>(v: &mut [T], is_less: &mut F)
where
F: FnMut(&T, &T) -> bool,
{
// SAFETY: See function safety.
unsafe {
intrinsics::assume(v.len() >= 2);
let len = v.len();
// Build the heap in linear time.
for i in (0..v.len() / 2).rev() {
sift_down(v, i, is_less);
}
// Pop maximal elements from the heap.
for i in (1..v.len()).rev() {
for i in (0..len + len / 2).rev() {
let sift_idx = if i >= len {
i - len
} else {
v.swap(0, i);
sift_down(&mut v[..i], 0, is_less);
0
};
// SAFETY: The above calculation ensures that `sift_idx` is either 0 or
// `(len..(len + (len / 2))) - len`, which simplifies to `0..(len / 2)`.
// This guarantees the required `sift_idx <= len`.
unsafe {
sift_down(&mut v[..cmp::min(i, len)], sift_idx, is_less);
}
}
}
// This binary heap respects the invariant `parent >= child`.
//
// SAFETY: The caller has to guarantee that node < `v.len()`.
#[inline(never)]
// SAFETY: The caller has to guarantee that `node <= v.len()`.
#[inline(always)]
unsafe fn sift_down<T, F>(v: &mut [T], mut node: usize, is_less: &mut F)
where
F: FnMut(&T, &T) -> bool,
{
// SAFETY: See function safety.
unsafe {
intrinsics::assume(node < v.len());
intrinsics::assume(node <= v.len());
}
let len = v.len();

View file

@ -32,10 +32,7 @@ pub fn sort<T, F: FnMut(&T, &T) -> bool>(v: &mut [T], is_less: &mut F) {
cfg_if! {
if #[cfg(feature = "optimize_for_size")] {
// SAFETY: We checked that `len >= 2`.
unsafe {
heapsort::heapsort(v, is_less);
}
heapsort::heapsort(v, is_less);
} else {
// More advanced sorting methods than insertion sort are faster if called in
// a hot loop for small inputs, but for general-purpose code the small

View file

@ -5,6 +5,8 @@ use crate::mem::{self, ManuallyDrop};
use crate::slice::sort::shared::pivot::choose_pivot;
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::shared::smallsort::UnstableSmallSortTypeImpl;
#[cfg(not(feature = "optimize_for_size"))]
use crate::slice::sort::unstable::heapsort;
use crate::{intrinsics, ptr};
/// Sorts `v` recursively.
@ -31,10 +33,7 @@ pub(crate) fn quicksort<'a, T, F>(
// If too many bad pivot choices were made, simply fall back to heapsort in order to
// guarantee `O(N x log(N))` worst-case.
if limit == 0 {
// SAFETY: We assume the `small_sort` threshold is at least 1.
unsafe {
crate::slice::sort::unstable::heapsort::heapsort(v, is_less);
}
heapsort::heapsort(v, is_less);
return;
}