auto merge of #19946 : cgaebel/rust/hashmap-drain-iter, r=gankro

It is useful to move all the elements out of a hashmap without deallocating
the underlying buffer. It came up in IRC, and this patch implements it as
`drain`.

r? @Gankro
cc: @frankmcsherry
This commit is contained in:
bors 2014-12-21 07:22:45 +00:00
commit ce468e643a
6 changed files with 470 additions and 18 deletions

View file

@ -551,9 +551,18 @@ impl<T: Ord> BinaryHeap<T> {
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn is_empty(&self) -> bool { self.len() == 0 }
/// Clears the queue, returning an iterator over the removed elements.
#[inline]
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn drain<'a>(&'a mut self) -> Drain<'a, T> {
Drain {
iter: self.data.drain(),
}
}
/// Drops all items from the queue.
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn clear(&mut self) { self.data.truncate(0) }
pub fn clear(&mut self) { self.drain(); }
}
/// `BinaryHeap` iterator.
@ -596,6 +605,26 @@ impl<T> DoubleEndedIterator<T> for MoveItems<T> {
impl<T> ExactSizeIterator<T> for MoveItems<T> {}
/// An iterator that drains a `BinaryHeap`.
pub struct Drain<'a, T: 'a> {
iter: vec::Drain<'a, T>,
}
impl<'a, T: 'a> Iterator<T> for Drain<'a, T> {
#[inline]
fn next(&mut self) -> Option<T> { self.iter.next() }
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) { self.iter.size_hint() }
}
impl<'a, T: 'a> DoubleEndedIterator<T> for Drain<'a, T> {
#[inline]
fn next_back(&mut self) -> Option<T> { self.iter.next_back() }
}
impl<'a, T: 'a> ExactSizeIterator<T> for Drain<'a, T> {}
impl<T: Ord> FromIterator<T> for BinaryHeap<T> {
fn from_iter<Iter: Iterator<T>>(iter: Iter) -> BinaryHeap<T> {
let vec: Vec<T> = iter.collect();
@ -819,4 +848,14 @@ mod tests {
assert_eq!(q.pop().unwrap(), x);
}
}
#[test]
fn test_drain() {
let mut q: BinaryHeap<_> =
[9u, 8, 7, 6, 5, 4, 3, 2, 1].iter().cloned().collect();
assert_eq!(q.drain().take(5).count(), 5);
assert!(q.is_empty());
}
}

View file

@ -491,6 +491,27 @@ impl<T> RingBuf<T> {
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn is_empty(&self) -> bool { self.len() == 0 }
/// Creates a draining iterator that clears the `RingBuf` and iterates over
/// the removed items from start to end.
///
/// # Examples
///
/// ```
/// use std::collections::RingBuf;
///
/// let mut v = RingBuf::new();
/// v.push_back(1i);
/// assert_eq!(v.drain().next(), Some(1));
/// assert!(v.is_empty());
/// ```
#[inline]
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn drain<'a>(&'a mut self) -> Drain<'a, T> {
Drain {
inner: self,
}
}
/// Clears the buffer, removing all values.
///
/// # Examples
@ -504,10 +525,9 @@ impl<T> RingBuf<T> {
/// assert!(v.is_empty());
/// ```
#[unstable = "matches collection reform specification, waiting for dust to settle"]
#[inline]
pub fn clear(&mut self) {
while self.pop_front().is_some() {}
self.head = 0;
self.tail = 0;
self.drain();
}
/// Provides a reference to the front element, or `None` if the sequence is
@ -1230,9 +1250,44 @@ impl<T> DoubleEndedIterator<T> for MoveItems<T> {
}
}
impl<T> ExactSizeIterator<T> for MoveItems<T> {}
/// A draining RingBuf iterator
pub struct Drain<'a, T: 'a> {
inner: &'a mut RingBuf<T>,
}
#[unsafe_destructor]
impl<'a, T: 'a> Drop for Drain<'a, T> {
fn drop(&mut self) {
for _ in *self {}
self.inner.head = 0;
self.inner.tail = 0;
}
}
impl<'a, T: 'a> Iterator<T> for Drain<'a, T> {
#[inline]
fn next(&mut self) -> Option<T> {
self.inner.pop_front()
}
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) {
let len = self.inner.len();
(len, Some(len))
}
}
impl<'a, T: 'a> DoubleEndedIterator<T> for Drain<'a, T> {
#[inline]
fn next_back(&mut self) -> Option<T> {
self.inner.pop_back()
}
}
impl<'a, T: 'a> ExactSizeIterator<T> for Drain<'a, T> {}
impl<A: PartialEq> PartialEq for RingBuf<A> {
fn eq(&self, other: &RingBuf<A>) -> bool {
self.len() == other.len() &&
@ -1841,6 +1896,73 @@ mod tests {
}
}
#[test]
fn test_drain() {
// Empty iter
{
let mut d: RingBuf<int> = RingBuf::new();
{
let mut iter = d.drain();
assert_eq!(iter.size_hint(), (0, Some(0)));
assert_eq!(iter.next(), None);
assert_eq!(iter.size_hint(), (0, Some(0)));
}
assert!(d.is_empty());
}
// simple iter
{
let mut d = RingBuf::new();
for i in range(0i, 5) {
d.push_back(i);
}
assert_eq!(d.drain().collect::<Vec<int>>(), [0, 1, 2, 3, 4]);
assert!(d.is_empty());
}
// wrapped iter
{
let mut d = RingBuf::new();
for i in range(0i, 5) {
d.push_back(i);
}
for i in range(6, 9) {
d.push_front(i);
}
assert_eq!(d.drain().collect::<Vec<int>>(), [8,7,6,0,1,2,3,4]);
assert!(d.is_empty());
}
// partially used
{
let mut d = RingBuf::new();
for i in range(0i, 5) {
d.push_back(i);
}
for i in range(6, 9) {
d.push_front(i);
}
{
let mut it = d.drain();
assert_eq!(it.size_hint(), (8, Some(8)));
assert_eq!(it.next(), Some(8));
assert_eq!(it.size_hint(), (7, Some(7)));
assert_eq!(it.next_back(), Some(4));
assert_eq!(it.size_hint(), (6, Some(6)));
assert_eq!(it.next(), Some(7));
assert_eq!(it.size_hint(), (5, Some(5)));
}
assert!(d.is_empty());
}
}
#[test]
fn test_from_iter() {
use core::iter;

View file

@ -1117,6 +1117,38 @@ impl<T> Vec<T> {
}
}
/// Creates a draining iterator that clears the `Vec` and iterates over
/// the removed items from start to end.
///
/// # Examples
///
/// ```
/// let mut v = vec!["a".to_string(), "b".to_string()];
/// for s in v.drain() {
/// // s has type String, not &String
/// println!("{}", s);
/// }
/// assert!(v.is_empty());
/// ```
#[inline]
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn drain<'a>(&'a mut self) -> Drain<'a, T> {
unsafe {
let begin = self.ptr as *const T;
let end = if mem::size_of::<T>() == 0 {
(self.ptr as uint + self.len()) as *const T
} else {
self.ptr.offset(self.len() as int) as *const T
};
self.set_len(0);
Drain {
ptr: begin,
end: end,
marker: ContravariantLifetime,
}
}
}
/// Clears the vector, removing all values.
///
/// # Examples
@ -1373,8 +1405,9 @@ pub struct MoveItems<T> {
}
impl<T> MoveItems<T> {
#[inline]
/// Drops all items that have not yet been moved and returns the empty vector.
#[inline]
#[unstable]
pub fn into_inner(mut self) -> Vec<T> {
unsafe {
for _x in self { }
@ -1384,8 +1417,8 @@ impl<T> MoveItems<T> {
}
}
/// Deprecated, use into_inner() instead
#[deprecated = "renamed to into_inner()"]
/// Deprecated, use .into_inner() instead
#[deprecated = "use .into_inner() instead"]
pub fn unwrap(self) -> Vec<T> { self.into_inner() }
}
@ -1461,6 +1494,84 @@ impl<T> Drop for MoveItems<T> {
}
}
/// An iterator that drains a vector.
#[unsafe_no_drop_flag]
pub struct Drain<'a, T> {
ptr: *const T,
end: *const T,
marker: ContravariantLifetime<'a>,
}
impl<'a, T> Iterator<T> for Drain<'a, T> {
#[inline]
fn next(&mut self) -> Option<T> {
unsafe {
if self.ptr == self.end {
None
} else {
if mem::size_of::<T>() == 0 {
// purposefully don't use 'ptr.offset' because for
// vectors with 0-size elements this would return the
// same pointer.
self.ptr = mem::transmute(self.ptr as uint + 1);
// Use a non-null pointer value
Some(ptr::read(mem::transmute(1u)))
} else {
let old = self.ptr;
self.ptr = self.ptr.offset(1);
Some(ptr::read(old))
}
}
}
}
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) {
let diff = (self.end as uint) - (self.ptr as uint);
let size = mem::size_of::<T>();
let exact = diff / (if size == 0 {1} else {size});
(exact, Some(exact))
}
}
impl<'a, T> DoubleEndedIterator<T> for Drain<'a, T> {
#[inline]
fn next_back(&mut self) -> Option<T> {
unsafe {
if self.end == self.ptr {
None
} else {
if mem::size_of::<T>() == 0 {
// See above for why 'ptr.offset' isn't used
self.end = mem::transmute(self.end as uint - 1);
// Use a non-null pointer value
Some(ptr::read(mem::transmute(1u)))
} else {
self.end = self.end.offset(-1);
Some(ptr::read(self.end))
}
}
}
}
}
impl<'a, T> ExactSizeIterator<T> for Drain<'a, T> {}
#[unsafe_destructor]
impl<'a, T> Drop for Drain<'a, T> {
fn drop(&mut self) {
// self.ptr == self.end == null if drop has already been called,
// so we can use #[unsafe_no_drop_flag].
// destroy the remaining elements
for _x in *self {}
}
}
/// Converts an iterator of pairs into a pair of vectors.
///
/// Returns a tuple containing two vectors where the i-th element of the first vector contains the
@ -2267,6 +2378,39 @@ mod tests {
assert!(vec2 == vec![(), (), ()]);
}
#[test]
fn test_drain_items() {
let mut vec = vec![1, 2, 3];
let mut vec2: Vec<i32> = vec![];
for i in vec.drain() {
vec2.push(i);
}
assert_eq!(vec, []);
assert_eq!(vec2, [ 1, 2, 3 ]);
}
#[test]
fn test_drain_items_reverse() {
let mut vec = vec![1, 2, 3];
let mut vec2: Vec<i32> = vec![];
for i in vec.drain().rev() {
vec2.push(i);
}
assert_eq!(vec, []);
assert_eq!(vec2, [ 3, 2, 1 ]);
}
#[test]
fn test_drain_items_zero_sized() {
let mut vec = vec![(), (), ()];
let mut vec2: Vec<()> = vec![];
for i in vec.drain() {
vec2.push(i);
}
assert_eq!(vec, []);
assert_eq!(vec2, [(), (), ()]);
}
#[test]
fn test_into_boxed_slice() {
let xs = vec![1u, 2, 3];

View file

@ -982,6 +982,35 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn is_empty(&self) -> bool { self.len() == 0 }
/// Clears the map, returning all key-value pairs as an iterator. Keeps the
/// allocated memory for reuse.
///
/// # Example
///
/// ```
/// use std::collections::HashMap;
///
/// let mut a = HashMap::new();
/// a.insert(1u, "a");
/// a.insert(2u, "b");
///
/// for (k, v) in a.drain().take(1) {
/// assert!(k == 1 || k == 2);
/// assert!(v == "a" || v == "b");
/// }
///
/// assert!(a.is_empty());
/// ```
#[inline]
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn drain(&mut self) -> Drain<K, V> {
fn last_two<A, B, C>((_, b, c): (A, B, C)) -> (B, C) { (b, c) }
Drain {
inner: self.table.drain().map(last_two),
}
}
/// Clears the map, removing all key-value pairs. Keeps the allocated memory
/// for reuse.
///
@ -996,16 +1025,9 @@ impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
/// assert!(a.is_empty());
/// ```
#[unstable = "matches collection reform specification, waiting for dust to settle"]
#[inline]
pub fn clear(&mut self) {
let cap = self.table.capacity();
let mut buckets = Bucket::first(&mut self.table);
while buckets.index() != cap {
buckets = match buckets.peek() {
Empty(b) => b.next(),
Full(full) => full.take().0.next(),
};
}
self.drain();
}
/// Deprecated: Renamed to `get`.
@ -1306,6 +1328,16 @@ pub struct Values<'a, K: 'a, V: 'a> {
inner: Map<(&'a K, &'a V), &'a V, Entries<'a, K, V>, fn((&'a K, &'a V)) -> &'a V>
}
/// HashMap drain iterator
pub struct Drain<'a, K: 'a, V: 'a> {
inner: iter::Map<
(SafeHash, K, V),
(K, V),
table::Drain<'a, K, V>,
fn((SafeHash, K, V)) -> (K, V),
>
}
/// A view into a single occupied location in a HashMap
pub struct OccupiedEntry<'a, K:'a, V:'a> {
elem: FullBucket<K, V, &'a mut RawTable<K, V>>,
@ -1360,6 +1392,17 @@ impl<'a, K, V> Iterator<&'a V> for Values<'a, K, V> {
#[inline] fn size_hint(&self) -> (uint, Option<uint>) { self.inner.size_hint() }
}
impl<'a, K: 'a, V: 'a> Iterator<(K, V)> for Drain<'a, K, V> {
#[inline]
fn next(&mut self) -> Option<(K, V)> {
self.inner.next()
}
#[inline]
fn size_hint(&self) -> (uint, Option<uint>) {
self.inner.size_hint()
}
}
impl<'a, K, V> OccupiedEntry<'a, K, V> {
/// Gets a reference to the value in the entry
pub fn get(&self) -> &V {

View file

@ -21,7 +21,7 @@ use iter::{Iterator, IteratorExt, FromIterator, Map, FilterMap, Chain, Repeat, Z
use option::Option::{Some, None, mod};
use result::Result::{Ok, Err};
use super::map::{HashMap, MoveEntries, Keys, INITIAL_CAPACITY};
use super::map::{mod, HashMap, MoveEntries, Keys, INITIAL_CAPACITY};
// FIXME(conventions): implement BitOr, BitAnd, BitXor, and Sub
@ -420,6 +420,14 @@ impl<T: Eq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn is_empty(&self) -> bool { self.map.len() == 0 }
/// Clears the set, returning all elements in an iterator.
#[inline]
#[unstable = "matches collection reform specification, waiting for dust to settle"]
pub fn drain(&mut self) -> Drain<T> {
fn first<A, B>((a, _): (A, B)) -> A { a }
Drain { iter: self.map.drain().map(first) }
}
/// Clears the set, removing all values.
///
/// # Example
@ -626,6 +634,11 @@ pub struct SetMoveItems<K> {
iter: Map<(K, ()), K, MoveEntries<K, ()>, fn((K, ())) -> K>
}
/// HashSet drain iterator
pub struct Drain<'a, K: 'a> {
iter: Map<(K, ()), K, map::Drain<'a, K, ()>, fn((K, ())) -> K>,
}
// `Repeat` is used to feed the filter closure an explicit capture
// of a reference to the other set
/// Set operations iterator, used directly for intersection and difference
@ -658,6 +671,11 @@ impl<K> Iterator<K> for SetMoveItems<K> {
fn size_hint(&self) -> (uint, Option<uint>) { self.iter.size_hint() }
}
impl<'a, K: 'a> Iterator<K> for Drain<'a, K> {
fn next(&mut self) -> Option<K> { self.iter.next() }
fn size_hint(&self) -> (uint, Option<uint>) { self.iter.size_hint() }
}
impl<'a, T, H> Iterator<&'a T> for SetAlgebraItems<'a, T, H> {
fn next(&mut self) -> Option<&'a T> { self.iter.next() }
fn size_hint(&self) -> (uint, Option<uint>) { self.iter.size_hint() }
@ -913,4 +931,41 @@ mod test_set {
assert!(set_str == "{1, 2}" || set_str == "{2, 1}");
assert_eq!(format!("{}", empty), "{}");
}
#[test]
fn test_trivial_drain() {
let mut s = HashSet::<int>::new();
for _ in s.drain() {}
assert!(s.is_empty());
drop(s);
let mut s = HashSet::<int>::new();
drop(s.drain());
assert!(s.is_empty());
}
#[test]
fn test_drain() {
let mut s: HashSet<int> = range(1, 100).collect();
// try this a bunch of times to make sure we don't screw up internal state.
for _ in range(0i, 20) {
assert_eq!(s.len(), 99);
{
let mut last_i = 0;
let mut d = s.drain();
for (i, x) in d.by_ref().take(50).enumerate() {
last_i = i;
assert!(x != 0);
}
assert_eq!(last_i, 49);
}
for _ in s.iter() { panic!("s should be empty!"); }
// reset to try again.
s.extend(range(1, 100));
}
}
}

View file

@ -684,6 +684,19 @@ impl<K, V> RawTable<K, V> {
}
}
pub fn drain(&mut self) -> Drain<K, V> {
let RawBuckets { raw, hashes_end, .. } = self.raw_buckets();
// Replace the marker regardless of lifetime bounds on parameters.
Drain {
iter: RawBuckets {
raw: raw,
hashes_end: hashes_end,
marker: marker::ContravariantLifetime::<'static>,
},
table: self,
}
}
/// Returns an iterator that copies out each entry. Used while the table
/// is being dropped.
unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> {
@ -774,6 +787,12 @@ pub struct MoveEntries<K, V> {
iter: RawBuckets<'static, K, V>
}
/// Iterator over the entries in a table, clearing the table.
pub struct Drain<'a, K: 'a, V: 'a> {
table: &'a mut RawTable<K, V>,
iter: RawBuckets<'static, K, V>,
}
impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
fn next(&mut self) -> Option<(&'a K, &'a V)> {
self.iter.next().map(|bucket| {
@ -828,6 +847,36 @@ impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> {
}
}
impl<'a, K: 'a, V: 'a> Iterator<(SafeHash, K, V)> for Drain<'a, K, V> {
#[inline]
fn next(&mut self) -> Option<(SafeHash, K, V)> {
self.iter.next().map(|bucket| {
self.table.size -= 1;
unsafe {
(
SafeHash {
hash: ptr::replace(bucket.hash, EMPTY_BUCKET),
},
ptr::read(bucket.key as *const K),
ptr::read(bucket.val as *const V)
)
}
})
}
fn size_hint(&self) -> (uint, Option<uint>) {
let size = self.table.size();
(size, Some(size))
}
}
#[unsafe_destructor]
impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> {
fn drop(&mut self) {
for _ in *self {}
}
}
impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
fn clone(&self) -> RawTable<K, V> {
unsafe {