From cef77636497fad5f3e07c5089ef8ed68fa739689 Mon Sep 17 00:00:00 2001 From: Simon BD Date: Tue, 25 Sep 2012 17:53:24 -0500 Subject: [PATCH] Add timsort to std/sort --- src/libstd/sort.rs | 594 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 594 insertions(+) diff --git a/src/libstd/sort.rs b/src/libstd/sort.rs index 46f79c019486..8cd98f8ab0ba 100644 --- a/src/libstd/sort.rs +++ b/src/libstd/sort.rs @@ -4,6 +4,7 @@ use vec::{len, push}; use core::cmp::{Eq, Ord}; +use dvec::DVec; export le; export merge_sort; @@ -168,6 +169,599 @@ impl &[mut T] : Sort { fn qsort(self) { quick_sort3(self); } } +const MIN_MERGE: uint = 64; +const MIN_GALLOP: uint = 7; +const INITIAL_TMP_STORAGE: uint = 128; + +fn timsort(array: &[mut T]) { + let size = array.len(); + if size < 2 { + return; + } + + if size < MIN_MERGE { + let initRunLen = countRunAndMakeAscending(array); + binarysort(array, initRunLen); + return; + } + + let ms = &MergeState(); + let minRun = minRunLength(size); + + let mut idx = 0; + let mut remaining = size; + loop { + let arr = vec::mut_view(array, idx, size); + let mut runLen: uint = countRunAndMakeAscending(arr); + + if runLen < minRun { + let force = if remaining <= minRun {remaining} else {minRun}; + let slice = vec::mut_view(arr, 0, force); + binarysort(slice, runLen); + runLen = force; + } + + ms.pushRun(idx, runLen); + ms.mergeCollapse(array); + + idx += runLen; + remaining -= runLen; + if remaining == 0 { break; } + } + + ms.mergeForceCollapse(array); +} + +fn binarysort(array: &[mut T], start: uint) { + let size = array.len(); + let mut start = start; + assert start <= size; + + if start == 0 { start += 1; } + + let mut pivot = ~[mut]; + vec::reserve(pivot, 1); + unsafe { vec::raw::set_len(pivot, 1); }; + + while start < size { + unsafe { + let tmpView = vec::mut_view(array, start, start+1); + vec::raw::memmove(pivot, tmpView, 1); + } + let mut left = 0; + let mut right = start; + assert left <= right; + + while left < right { + let mid = (left + right) >> 1; + if pivot[0] < array[mid] { + right = mid; + } else { + left = mid+1; + } + } + assert left == right; + let mut n = start-left; + + unsafe { + moveVec(array, left+1, array, left, n); + } + array[left] <-> pivot[0]; + start += 1; + } + unsafe { vec::raw::set_len(pivot, 0); } // Forget the boxed element +} + +/// Reverse the order of elements in a slice, in place +fn reverseSlice(v: &[mut T], start: uint, end:uint) { + let mut i = start; + while i < end / 2 { + v[i] <-> v[end - i - 1]; + i += 1; + } +} + +pure fn minRunLength(n: uint) -> uint { + let mut n = n; + let mut r = 0; // becomes 1 if any 1 bits are shifted off + + while n >= MIN_MERGE { + r |= n & 1; + n >>= 1; + } + return n + r; +} + +fn countRunAndMakeAscending(array: &[mut T]) -> uint { + let size = array.len(); + assert size > 0; + if size == 1 { return 1; } + + let mut run = 2; + if array[1] < array[0] { + while run < size && array[run] < array[run-1] { + run += 1; + } + reverseSlice(array, 0, run); + } else { + while run < size && array[run] >= array[run-1] { + run += 1; + } + } + + return run; +} + +pure fn gallopLeft(key: &const T, array: &[const T], hint: uint) -> uint { + let size = array.len(); + assert size != 0 && hint < size; + + let mut lastOfs = 0; + let mut ofs = 1; + + if *key > array[hint] { + // Gallop right until array[hint+lastOfs] < key <= array[hint+ofs] + let maxOfs = size - hint; + while ofs < maxOfs && *key > array[hint+ofs] { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + if ofs < lastOfs { ofs = maxOfs; } // uint overflow guard + } + if ofs > maxOfs { ofs = maxOfs; } + + lastOfs += hint; + ofs += hint; + } else { + let maxOfs = hint + 1; + while ofs < maxOfs && *key <= array[hint-ofs] { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + if ofs < lastOfs { ofs = maxOfs; } // uint overflow guard + } + + if ofs > maxOfs { ofs = maxOfs; } + + let tmp = lastOfs; + lastOfs = hint - ofs; + ofs = hint - tmp; + } + assert (lastOfs < ofs || lastOfs+1 < ofs+1) && ofs <= size; + + lastOfs += 1; + while lastOfs < ofs { + let m = lastOfs + ((ofs - lastOfs) >> 1); + if *key > array[m] { + lastOfs = m+1; + } else { + ofs = m; + } + } + assert lastOfs == ofs; + return ofs; +} + +pure fn gallopRight(key: &const T, array: &[const T], hint: uint) -> uint { + let size = array.len(); + assert size != 0 && hint < size; + + let mut lastOfs = 0; + let mut ofs = 1; + + if *key >= array[hint] { + // Gallop right until array[hint+lastOfs] <= key < array[hint+ofs] + let maxOfs = size - hint; + while ofs < maxOfs && *key >= array[hint+ofs] { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + if ofs < lastOfs { ofs = maxOfs; } + } + if ofs > maxOfs { ofs = maxOfs; } + + lastOfs += hint; + ofs += hint; + } else { + // Gallop left until array[hint-ofs] <= key < array[hint-lastOfs] + let maxOfs = hint + 1; + while ofs < maxOfs && *key < array[hint-ofs] { + lastOfs = ofs; + ofs = (ofs << 1) + 1; + if ofs < lastOfs { ofs = maxOfs; } + } + if ofs > maxOfs { ofs = maxOfs; } + + let tmp = lastOfs; + lastOfs = hint - ofs; + ofs = hint - tmp; + } + + assert (lastOfs < ofs || lastOfs+1 < ofs+1) && ofs <= size; + + lastOfs += 1; + while lastOfs < ofs { + let m = lastOfs + ((ofs - lastOfs) >> 1); + + if *key >= array[m] { + lastOfs = m + 1; + } else { + ofs = m; + } + } + assert lastOfs == ofs; + return ofs; +} + +struct RunState { + base: uint, + len: uint, +} + +struct MergeState { + mut minGallop: uint, + mut tmp: ~[T], + runs: DVec, + + drop { + unsafe { + vec::raw::set_len(self.tmp, 0); + } + } +} + +fn MergeState() -> MergeState { + let mut tmp = ~[]; + vec::reserve(tmp, INITIAL_TMP_STORAGE); + MergeState { + minGallop: MIN_GALLOP, + tmp: tmp, + runs: DVec(), + } +} + +impl &MergeState { + fn pushRun(runBase: uint, runLen: uint) { + let tmp = RunState{base: runBase, len: runLen}; + self.runs.push(tmp); + } + + fn mergeAt(n: uint, array: &[mut T]) { + let mut size = self.runs.len(); + assert size >= 2; + assert n == size-2 || n == size-3; + + do self.runs.borrow_mut |arr| { + + let mut b1 = arr[n].base; + let mut l1 = arr[n].len; + let b2 = arr[n+1].base; + let l2 = arr[n+1].len; + + assert l1 > 0 && l2 > 0; + assert b1 + l1 == b2; + + arr[n].len = l1 + l2; + if n == size-3 { + arr[n+1].base = arr[n+2].base; + arr[n+1].len = arr[n+2].len; + } + + let slice = vec::mut_view(array, b1, b1+l1); + let k = gallopRight(&const array[b2], slice, 0); + b1 += k; + l1 -= k; + if l1 != 0 { + let slice = vec::mut_view(array, b2, b2+l2); + let l2 = gallopLeft( + &const array[b1+l1-1],slice,l2-1); + if l2 > 0 { + if l1 <= l2 { + self.mergeLo(array, b1, l1, b2, l2); + } else { + self.mergeHi(array, b1, l1, b2, l2); + } + } + } + } + self.runs.pop(); + } + + fn mergeLo(array: &[mut T], base1: uint, len1: uint, base2: uint, len2: uint) { + assert len1 != 0 && len2 != 0 && base1+len1 == base2; + + vec::reserve(self.tmp, len1); + + unsafe { + vec::raw::set_len(self.tmp, len1); + moveVec(self.tmp, 0, array, base1, len1); + } + + let mut c1 = 0; + let mut c2 = base2; + let mut dest = base1; + let mut len1 = len1; + let mut len2 = len2; + + array[dest] <-> array[c2]; + dest += 1; c2 += 1; len2 -= 1; + + if len2 == 0 { + unsafe { + moveVec(array, dest, self.tmp, 0, len1); + vec::raw::set_len(self.tmp, 0); // Forget the elements + } + return; + } + if len1 == 1 { + unsafe { + moveVec(array, dest, array, c2, len2); + array[dest+len2] <-> self.tmp[c1]; + vec::raw::set_len(self.tmp, 0); // Forget the element + } + return; + } + + let mut minGallop = self.minGallop; + loop { + let mut count1 = 0; + let mut count2 = 0; + let mut breakOuter = false; + + loop { + assert len1 > 1 && len2 != 0; + if array[c2] < self.tmp[c1] { + array[dest] <-> array[c2]; + dest += 1; c2 += 1; len2 -= 1; + count2 += 1; count1 = 0; + if len2 == 0 { + breakOuter = true; + } + } else { + array[dest] <-> self.tmp[c1]; + dest += 1; c1 += 1; len1 -= 1; + count1 += 1; count2 = 0; + if len1 == 1 { + breakOuter = true; + } + } + if breakOuter || ((count1 | count2) >= minGallop) { + break; + } + } + if breakOuter { break; } + + // Start to gallop + loop { + assert len1 > 1 && len2 != 0; + + let tmpView = vec::mut_view(self.tmp, c1, c1+len1); + count1 = gallopRight(&const array[c2], tmpView, 0); + if count1 != 0 { + unsafe { + moveVec(array, dest, self.tmp, c1, count1); + } + dest += count1; c1 += count1; len1 -= count1; + if len1 <= 1 { breakOuter = true; break; } + } + array[dest] <-> array[c2]; + dest += 1; c2 += 1; len2 -= 1; + if len2 == 0 { breakOuter = true; break; } + + let tmpView = vec::mut_view(array, c2, c2+len2); + count2 = gallopLeft(&const self.tmp[c1], tmpView, 0); + if count2 != 0 { + unsafe { + moveVec(array, dest, array, c2, count2); + } + dest += count2; c2 += count2; len2 -= count2; + if len2 == 0 { breakOuter = true; break; } + } + array[dest] <-> self.tmp[c1]; + dest += 1; c1 += 1; len1 -= 1; + if len1 == 1 { breakOuter = true; break; } + minGallop -= 1; + if !(count1 >= MIN_GALLOP || count2 >= MIN_GALLOP) { break; } + } + if breakOuter { break; } + if minGallop < 0 { minGallop = 0; } + minGallop += 2; // Penalize for leaving gallop + } + self.minGallop = if minGallop < 1 { 1 } else { minGallop }; + + if len1 == 1 { + assert len2 > 0; + unsafe { + moveVec(array, dest, array, c2, len2); + } + array[dest+len2] <-> self.tmp[c1]; + } else if len1 == 0 { + fail fmt!("Method mergeLo violates its contract! %?", len1); + } else { + assert len2 == 0; + assert len1 > 1; + unsafe { + moveVec(array, dest, self.tmp, c1, len1); + } + } + unsafe { vec::raw::set_len(self.tmp, 0); } + } + + fn mergeHi(array: &[mut T], base1: uint, len1: uint, base2: uint, len2: uint) { + assert len1 != 1 && len2 != 0 && base1 + len1 == base2; + + vec::reserve(self.tmp, len2); + + unsafe { + vec::raw::set_len(self.tmp, len2); + moveVec(self.tmp, 0, array, base2, len2); + } + + let mut c1 = base1 + len1 - 1; + let mut c2 = len2 - 1; + let mut dest = base2 + len2 - 1; + let mut len1 = len1; + let mut len2 = len2; + + array[dest] <-> array[c1]; + dest -= 1; c1 -= 1; len1 -= 1; + + if len1 == 0 { + unsafe { + moveVec(array, dest-(len2-1), self.tmp, 0, len2); + vec::raw::set_len(self.tmp, 0); // Forget the elements + } + return; + } + if len2 == 1 { + dest -= len1; + c1 -= len1; + unsafe { + moveVec(array, dest+1, array, c1+1, len1); + array[dest] <-> self.tmp[c2]; + vec::raw::set_len(self.tmp, 0); // Forget the element + } + return; + } + + let mut minGallop = self.minGallop; + loop { + let mut count1 = 0; + let mut count2 = 0; + let mut breakOuter = false; + + loop { + assert len1 != 0 && len2 > 1; + if self.tmp[c2] < array[c1] { + array[dest] <-> array[c1]; + dest -= 1; c1 -= 1; len1 -= 1; + count1 += 1; count2 = 0; + if len1 == 0 { + breakOuter = true; + } + } else { + array[dest] <-> self.tmp[c2]; + dest -= 1; c2 -= 1; len2 -= 1; + count2 += 1; count1 = 0; + if len2 == 1 { + breakOuter = true; + } + } + if breakOuter || ((count1 | count2) >= minGallop) { + break; + } + } + if breakOuter { break; } + + // Start to gallop + loop { + assert len2 > 1 && len1 != 0; + + let tmpView = vec::mut_view(array, base1, base1+len1); + count1 = len1-gallopRight(&const self.tmp[c2], tmpView, len1-1); + + if count1 != 0 { + dest -= count1; c1 -= count1; len1 -= count1; + unsafe { + moveVec(array, dest+1, array, c1+1, count1); + } + if len1 == 0 { breakOuter = true; break; } + } + + array[dest] <-> self.tmp[c2]; + dest -= 1; c2 -= 1; len2 -= 1; + if len2 == 1 { breakOuter = true; break; } + + let tmpView = vec::mut_view(self.tmp, 0, len2); + let gL = gallopLeft(&const array[c1], tmpView, len2-1); + count2 = len2 - gL; + if count2 != 0 { + dest -= count2; c2 -= count2; len2 -= count2; + unsafe { + moveVec(array, dest+1, self.tmp, c2+1, count2); + } + if len2 <= 1 { breakOuter = true; break; } + } + array[dest] <-> array[c1]; + dest -= 1; c1 -= 1; len1 -= 1; + if len1 == 0 { breakOuter = true; break; } + minGallop -= 1; + if !(count1 >= MIN_GALLOP || count2 >= MIN_GALLOP) { break; } + } + + if breakOuter { break; } + if minGallop < 0 { minGallop = 0; } + minGallop += 2; // Penalize for leaving gallop + } + self.minGallop = if minGallop < 1 { 1 } else { minGallop }; + + if len2 == 1 { + assert len1 > 0; + dest -= len1; + c1 -= len1; + unsafe { + moveVec(array, dest+1, array, c1+1, len1); + } + array[dest] <-> self.tmp[c2]; + } else if len2 == 0 { + fail fmt!("Method mergeHi violates its contract! %?", len2); + } else { + assert len1 == 0; + assert len2 != 0; + unsafe { + moveVec(array, dest-(len2-1), self.tmp, 0, len2); + } + } + unsafe { vec::raw::set_len(self.tmp, 0); } + } + + fn mergeCollapse(array: &[mut T]) { + while self.runs.len() > 1 { + let mut n = self.runs.len()-2; + let chk = do self.runs.borrow |arr| { + if n > 0 && arr[n-1].len <= arr[n].len + arr[n+1].len { + if arr[n-1].len < arr[n+1].len { n -= 1; } + true + } else if arr[n].len <= arr[n+1].len { + true + } else { + false + } + }; + if !chk { break; } + self.mergeAt(n, array); + } + } + + fn mergeForceCollapse(array: &[mut T]) { + while self.runs.len() > 1 { + let mut n = self.runs.len()-2; + if n > 0 { + do self.runs.borrow |arr| { + if arr[n-1].len < arr[n+1].len { + n -= 1; + } + } + } + self.mergeAt(n, array); + } + } +} + +// Moves elements to from dest to from +// Unsafe as it makes the from parameter invalid between s2 and s2+len +#[inline(always)] +unsafe fn moveVec(dest: &[mut T], s1: uint, from: &[const T], s2: uint, len: uint) { + assert s1+len <= dest.len() && s2+len <= from.len(); + + do vec::as_mut_buf(dest) |p, _len| { + let destPtr = ptr::mut_offset(p, s1); + + do vec::as_const_buf(from) |p, _len| { + let fromPtr = ptr::const_offset(p, s2); + + ptr::memmove(destPtr, fromPtr, len); + } + } +} + #[cfg(test)] mod test_qsort3 { #[legacy_exports];