rust/src/libcollections/btree/node.rs

556 lines
20 KiB
Rust

// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// This module represents all the internal representation and logic for a B-Tree's node
// with a safe interface, so that BTreeMap itself does not depend on any of these details.
pub use self::InsertionResult::*;
pub use self::SearchResult::*;
pub use self::TraversalItem::*;
use core::prelude::*;
use core::{slice, mem, ptr};
use core::iter::Zip;
use core::borrow::BorrowFrom;
use vec;
use vec::Vec;
/// Represents the result of an Insertion: either the item fit, or the node had to split
pub enum InsertionResult<K, V> {
/// The inserted element fit
Fit,
/// The inserted element did not fit, so the node was split
Split(K, V, Node<K, V>),
}
/// Represents the result of a search for a key in a single node
pub enum SearchResult {
/// The element was found at the given index
Found(uint),
/// The element wasn't found, but if it's anywhere, it must be beyond this edge
GoDown(uint),
}
/// A B-Tree Node. We keep keys/edges/values separate to optimize searching for keys.
#[deriving(Clone)]
pub struct Node<K, V> {
// FIXME(Gankro): This representation is super safe and easy to reason about, but painfully
// inefficient. As three Vecs, each node consists of *9* words: (ptr, cap, size) * 3. In
// theory, if we take full control of allocation like HashMap's RawTable does,
// and restrict leaves to max size 256 (not unreasonable for a btree node) we can cut
// this down to just (ptr, cap: u8, size: u8, is_leaf: bool). With generic
// integer arguments, cap can even move into the type, reducing this just to
// (ptr, size, is_leaf). This could also have cache benefits for very small nodes, as keys
// could bleed into edges and vals.
//
// However doing this would require a fair amount of code to reimplement all
// the Vec logic and iterators. It would also use *way* more unsafe code, which sucks and is
// hard. For now, we accept this cost in the name of correctness and simplicity.
//
// As a compromise, keys and vals could be merged into one Vec<(K, V)>, which would shave
// off 3 words, but possibly hurt our cache efficiency during search, which only cares about
// keys. This would also avoid the Zip we use in our iterator implementations. This is
// probably worth investigating.
//
// Note that this space waste is especially tragic since we store the Nodes by value in their
// parent's edges Vec, so unoccupied spaces in the edges Vec are quite large, and we have
// to shift around a lot more bits during insertion/removal.
keys: Vec<K>,
edges: Vec<Node<K, V>>,
vals: Vec<V>,
}
impl<K: Ord, V> Node<K, V> {
/// Searches for the given key in the node. If it finds an exact match,
/// `Found` will be yielded with the matching index. If it doesn't find an exact match,
/// `GoDown` will be yielded with the index of the subtree the key must lie in.
pub fn search<Sized? Q>(&self, key: &Q) -> SearchResult where Q: BorrowFrom<K> + Ord {
// FIXME(Gankro): Tune when to search linear or binary based on B (and maybe K/V).
// For the B configured as of this writing (B = 6), binary search was *significantly*
// worse for uints.
self.search_linear(key)
}
fn search_linear<Sized? Q>(&self, key: &Q) -> SearchResult where Q: BorrowFrom<K> + Ord {
for (i, k) in self.keys.iter().enumerate() {
match key.cmp(BorrowFrom::borrow_from(k)) {
Greater => {},
Equal => return Found(i),
Less => return GoDown(i),
}
}
GoDown(self.len())
}
}
// Public interface
impl <K, V> Node<K, V> {
/// Make a new internal node
pub fn new_internal(capacity: uint) -> Node<K, V> {
Node {
keys: Vec::with_capacity(capacity),
vals: Vec::with_capacity(capacity),
edges: Vec::with_capacity(capacity + 1),
}
}
/// Make a new leaf node
pub fn new_leaf(capacity: uint) -> Node<K, V> {
Node {
keys: Vec::with_capacity(capacity),
vals: Vec::with_capacity(capacity),
edges: Vec::new(),
}
}
/// Make a leaf root from scratch
pub fn make_leaf_root(b: uint) -> Node<K, V> {
Node::new_leaf(capacity_from_b(b))
}
/// Make an internal root and swap it with an old root
pub fn make_internal_root(left_and_out: &mut Node<K,V>, b: uint, key: K, value: V,
right: Node<K,V>) {
let mut node = Node::new_internal(capacity_from_b(b));
mem::swap(left_and_out, &mut node);
left_and_out.keys.push(key);
left_and_out.vals.push(value);
left_and_out.edges.push(node);
left_and_out.edges.push(right);
}
/// How many key-value pairs the node contains
pub fn len(&self) -> uint {
self.keys.len()
}
/// How many key-value pairs the node can fit
pub fn capacity(&self) -> uint {
self.keys.capacity()
}
/// If the node has any children
pub fn is_leaf(&self) -> bool {
self.edges.is_empty()
}
/// if the node has too few elements
pub fn is_underfull(&self) -> bool {
self.len() < min_load_from_capacity(self.capacity())
}
/// if the node cannot fit any more elements
pub fn is_full(&self) -> bool {
self.len() == self.capacity()
}
/// Swap the given key-value pair with the key-value pair stored in the node's index,
/// without checking bounds.
pub unsafe fn unsafe_swap(&mut self, index: uint, key: &mut K, val: &mut V) {
mem::swap(self.keys.unsafe_mut(index), key);
mem::swap(self.vals.unsafe_mut(index), val);
}
/// Get the node's key mutably without any bounds checks.
pub unsafe fn unsafe_key_mut(&mut self, index: uint) -> &mut K {
self.keys.unsafe_mut(index)
}
/// Get the node's value at the given index
pub fn val(&self, index: uint) -> Option<&V> {
self.vals.get(index)
}
/// Get the node's value at the given index
pub fn val_mut(&mut self, index: uint) -> Option<&mut V> {
self.vals.get_mut(index)
}
/// Get the node's value mutably without any bounds checks.
pub unsafe fn unsafe_val_mut(&mut self, index: uint) -> &mut V {
self.vals.unsafe_mut(index)
}
/// Get the node's edge at the given index
pub fn edge(&self, index: uint) -> Option<&Node<K,V>> {
self.edges.get(index)
}
/// Get the node's edge mutably at the given index
pub fn edge_mut(&mut self, index: uint) -> Option<&mut Node<K,V>> {
self.edges.get_mut(index)
}
/// Get the node's edge mutably without any bounds checks.
pub unsafe fn unsafe_edge_mut(&mut self, index: uint) -> &mut Node<K,V> {
self.edges.unsafe_mut(index)
}
/// Pop an edge off the end of the node
pub fn pop_edge(&mut self) -> Option<Node<K,V>> {
self.edges.pop()
}
/// Try to insert this key-value pair at the given index in this internal node
/// If the node is full, we have to split it.
///
/// Returns a *mut V to the inserted value, because the caller may want this when
/// they're done mutating the tree, but we don't want to borrow anything for now.
pub fn insert_as_leaf(&mut self, index: uint, key: K, value: V) ->
(InsertionResult<K, V>, *mut V) {
if !self.is_full() {
// The element can fit, just insert it
self.insert_fit_as_leaf(index, key, value);
(Fit, unsafe { self.unsafe_val_mut(index) as *mut _ })
} else {
// The element can't fit, this node is full. Split it into two nodes.
let (new_key, new_val, mut new_right) = self.split();
let left_len = self.len();
let ptr = if index <= left_len {
self.insert_fit_as_leaf(index, key, value);
unsafe { self.unsafe_val_mut(index) as *mut _ }
} else {
new_right.insert_fit_as_leaf(index - left_len - 1, key, value);
unsafe { new_right.unsafe_val_mut(index - left_len - 1) as *mut _ }
};
(Split(new_key, new_val, new_right), ptr)
}
}
/// Try to insert this key-value pair at the given index in this internal node
/// If the node is full, we have to split it.
pub fn insert_as_internal(&mut self, index: uint, key: K, value: V, right: Node<K, V>)
-> InsertionResult<K, V> {
if !self.is_full() {
// The element can fit, just insert it
self.insert_fit_as_internal(index, key, value, right);
Fit
} else {
// The element can't fit, this node is full. Split it into two nodes.
let (new_key, new_val, mut new_right) = self.split();
let left_len = self.len();
if index <= left_len {
self.insert_fit_as_internal(index, key, value, right);
} else {
new_right.insert_fit_as_internal(index - left_len - 1, key, value, right);
}
Split(new_key, new_val, new_right)
}
}
/// Remove the key-value pair at the given index
pub fn remove_as_leaf(&mut self, index: uint) -> (K, V) {
match (self.keys.remove(index), self.vals.remove(index)) {
(Some(k), Some(v)) => (k, v),
_ => unreachable!(),
}
}
/// Handle an underflow in this node's child. We favour handling "to the left" because we know
/// we're empty, but our neighbour can be full. Handling to the left means when we choose to
/// steal, we pop off the end of our neighbour (always fast) and "unshift" ourselves
/// (always slow, but at least faster since we know we're half-empty).
/// Handling "to the right" reverses these roles. Of course, we merge whenever possible
/// because we want dense nodes, and merging is about equal work regardless of direction.
pub fn handle_underflow(&mut self, underflowed_child_index: uint) {
assert!(underflowed_child_index <= self.len());
unsafe {
if underflowed_child_index > 0 {
self.handle_underflow_to_left(underflowed_child_index);
} else {
self.handle_underflow_to_right(underflowed_child_index);
}
}
}
pub fn iter<'a>(&'a self) -> Traversal<'a, K, V> {
let is_leaf = self.is_leaf();
Traversal {
elems: self.keys.iter().zip(self.vals.iter()),
edges: self.edges.iter(),
head_is_edge: true,
tail_is_edge: true,
has_edges: !is_leaf,
}
}
pub fn iter_mut<'a>(&'a mut self) -> MutTraversal<'a, K, V> {
let is_leaf = self.is_leaf();
MutTraversal {
elems: self.keys.iter().zip(self.vals.iter_mut()),
edges: self.edges.iter_mut(),
head_is_edge: true,
tail_is_edge: true,
has_edges: !is_leaf,
}
}
pub fn into_iter(self) -> MoveTraversal<K, V> {
let is_leaf = self.is_leaf();
MoveTraversal {
elems: self.keys.into_iter().zip(self.vals.into_iter()),
edges: self.edges.into_iter(),
head_is_edge: true,
tail_is_edge: true,
has_edges: !is_leaf,
}
}
}
// Private implementation details
impl<K, V> Node<K, V> {
/// Make a node from its raw components
fn from_vecs(keys: Vec<K>, vals: Vec<V>, edges: Vec<Node<K, V>>) -> Node<K, V> {
Node {
keys: keys,
vals: vals,
edges: edges,
}
}
/// We have somehow verified that this key-value pair will fit in this internal node,
/// so insert under that assumption.
fn insert_fit_as_leaf(&mut self, index: uint, key: K, val: V) {
self.keys.insert(index, key);
self.vals.insert(index, val);
}
/// We have somehow verified that this key-value pair will fit in this internal node,
/// so insert under that assumption
fn insert_fit_as_internal(&mut self, index: uint, key: K, val: V, right: Node<K, V>) {
self.keys.insert(index, key);
self.vals.insert(index, val);
self.edges.insert(index + 1, right);
}
/// Node is full, so split it into two nodes, and yield the middle-most key-value pair
/// because we have one too many, and our parent now has one too few
fn split(&mut self) -> (K, V, Node<K, V>) {
let r_keys = split(&mut self.keys);
let r_vals = split(&mut self.vals);
let r_edges = if self.edges.is_empty() {
Vec::new()
} else {
split(&mut self.edges)
};
let right = Node::from_vecs(r_keys, r_vals, r_edges);
// Pop it
let key = self.keys.pop().unwrap();
let val = self.vals.pop().unwrap();
(key, val, right)
}
/// Right is underflowed. Try to steal from left,
/// but merge left and right if left is low too.
unsafe fn handle_underflow_to_left(&mut self, underflowed_child_index: uint) {
let left_len = self.edges[underflowed_child_index - 1].len();
if left_len > min_load_from_capacity(self.capacity()) {
self.steal_to_left(underflowed_child_index);
} else {
self.merge_children(underflowed_child_index - 1);
}
}
/// Left is underflowed. Try to steal from the right,
/// but merge left and right if right is low too.
unsafe fn handle_underflow_to_right(&mut self, underflowed_child_index: uint) {
let right_len = self.edges[underflowed_child_index + 1].len();
if right_len > min_load_from_capacity(self.capacity()) {
self.steal_to_right(underflowed_child_index);
} else {
self.merge_children(underflowed_child_index);
}
}
/// Steal! Stealing is roughly analogous to a binary tree rotation.
/// In this case, we're "rotating" right.
unsafe fn steal_to_left(&mut self, underflowed_child_index: uint) {
// Take the biggest stuff off left
let (mut key, mut val, edge) = {
let left = self.unsafe_edge_mut(underflowed_child_index - 1);
match (left.keys.pop(), left.vals.pop(), left.edges.pop()) {
(Some(k), Some(v), e) => (k, v, e),
_ => unreachable!(),
}
};
// Swap the parent's separating key-value pair with left's
self.unsafe_swap(underflowed_child_index - 1, &mut key, &mut val);
// Put them at the start of right
{
let right = self.unsafe_edge_mut(underflowed_child_index);
right.keys.insert(0, key);
right.vals.insert(0, val);
match edge {
None => {}
Some(e) => right.edges.insert(0, e)
}
}
}
/// Steal! Stealing is roughly analogous to a binary tree rotation.
/// In this case, we're "rotating" left.
unsafe fn steal_to_right(&mut self, underflowed_child_index: uint) {
// Take the smallest stuff off right
let (mut key, mut val, edge) = {
let right = self.unsafe_edge_mut(underflowed_child_index + 1);
match (right.keys.remove(0), right.vals.remove(0), right.edges.remove(0)) {
(Some(k), Some(v), e) => (k, v, e),
_ => unreachable!(),
}
};
// Swap the parent's separating key-value pair with right's
self.unsafe_swap(underflowed_child_index, &mut key, &mut val);
// Put them at the end of left
{
let left = self.unsafe_edge_mut(underflowed_child_index);
left.keys.push(key);
left.vals.push(val);
match edge {
None => {}
Some(e) => left.edges.push(e)
}
}
}
/// Merge! Left and right will be smooshed into one node, along with the key-value
/// pair that separated them in their parent.
unsafe fn merge_children(&mut self, left_index: uint) {
// Permanently remove right's index, and the key-value pair that separates
// left and right
let (key, val, right) = {
match (self.keys.remove(left_index),
self.vals.remove(left_index),
self.edges.remove(left_index + 1)) {
(Some(k), Some(v), Some(e)) => (k, v, e),
_ => unreachable!(),
}
};
// Give left right's stuff.
let left = self.unsafe_edge_mut(left_index);
left.absorb(key, val, right);
}
/// Take all the values from right, separated by the given key and value
fn absorb(&mut self, key: K, val: V, right: Node<K, V>) {
// Just as a sanity check, make sure we can fit this guy in
debug_assert!(self.len() + right.len() <= self.capacity())
self.keys.push(key);
self.vals.push(val);
self.keys.extend(right.keys.into_iter());
self.vals.extend(right.vals.into_iter());
self.edges.extend(right.edges.into_iter());
}
}
/// Takes a Vec, and splits half the elements into a new one.
fn split<T>(left: &mut Vec<T>) -> Vec<T> {
// This function is intended to be called on a full Vec of size 2B - 1 (keys, values),
// or 2B (edges). In the former case, left should get B elements, and right should get
// B - 1. In the latter case, both should get B. Therefore, we can just always take the last
// size / 2 elements from left, and put them on right. This also ensures this method is
// safe, even if the Vec isn't full. Just uninteresting for our purposes.
let len = left.len();
let right_len = len / 2;
let left_len = len - right_len;
let mut right = Vec::with_capacity(left.capacity());
unsafe {
let left_ptr = left.unsafe_get(left_len) as *const _;
let right_ptr = right.as_mut_ptr();
ptr::copy_nonoverlapping_memory(right_ptr, left_ptr, right_len);
left.set_len(left_len);
right.set_len(right_len);
}
right
}
/// Get the capacity of a node from the order of the parent B-Tree
fn capacity_from_b(b: uint) -> uint {
2 * b - 1
}
/// Get the minimum load of a node from its capacity
fn min_load_from_capacity(cap: uint) -> uint {
// B - 1
cap / 2
}
/// An abstraction over all the different kinds of traversals a node supports
struct AbsTraversal<Elems, Edges> {
elems: Elems,
edges: Edges,
head_is_edge: bool,
tail_is_edge: bool,
has_edges: bool,
}
/// A single atomic step in a traversal. Either an element is visited, or an edge is followed
pub enum TraversalItem<K, V, E> {
Elem(K, V),
Edge(E),
}
/// A traversal over a node's entries and edges
pub type Traversal<'a, K, V> = AbsTraversal<Zip<slice::Items<'a, K>, slice::Items<'a, V>>,
slice::Items<'a, Node<K, V>>>;
/// A mutable traversal over a node's entries and edges
pub type MutTraversal<'a, K, V> = AbsTraversal<Zip<slice::Items<'a, K>, slice::MutItems<'a, V>>,
slice::MutItems<'a, Node<K, V>>>;
/// An owning traversal over a node's entries and edges
pub type MoveTraversal<K, V> = AbsTraversal<Zip<vec::MoveItems<K>, vec::MoveItems<V>>,
vec::MoveItems<Node<K, V>>>;
impl<K, V, E, Elems: Iterator<(K, V)>, Edges: Iterator<E>>
Iterator<TraversalItem<K, V, E>> for AbsTraversal<Elems, Edges> {
fn next(&mut self) -> Option<TraversalItem<K, V, E>> {
let head_is_edge = self.head_is_edge;
self.head_is_edge = !head_is_edge;
if head_is_edge && self.has_edges {
self.edges.next().map(|node| Edge(node))
} else {
self.elems.next().map(|(k, v)| Elem(k, v))
}
}
}
impl<K, V, E, Elems: DoubleEndedIterator<(K, V)>, Edges: DoubleEndedIterator<E>>
DoubleEndedIterator<TraversalItem<K, V, E>> for AbsTraversal<Elems, Edges> {
fn next_back(&mut self) -> Option<TraversalItem<K, V, E>> {
let tail_is_edge = self.tail_is_edge;
self.tail_is_edge = !tail_is_edge;
if tail_is_edge && self.has_edges {
self.edges.next_back().map(|node| Edge(node))
} else {
self.elems.next_back().map(|(k, v)| Elem(k, v))
}
}
}