diff --git a/src/duplicates_impl.rs b/src/duplicates_impl.rs index a0db15432..91777e01f 100644 --- a/src/duplicates_impl.rs +++ b/src/duplicates_impl.rs @@ -1,31 +1,37 @@ -use std::hash::Hash; +use core::hash::BuildHasher; +use std::hash::{Hash, RandomState}; mod private { + use core::hash::BuildHasher; use std::collections::HashMap; use std::fmt; - use std::hash::Hash; + use std::hash::{Hash, RandomState}; #[derive(Clone)] #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] - pub struct DuplicatesBy { + pub struct DuplicatesBy + where + S: BuildHasher, + { pub(crate) iter: I, - pub(crate) meta: Meta, + pub(crate) meta: Meta, } - impl fmt::Debug for DuplicatesBy + impl fmt::Debug for DuplicatesBy where I: Iterator + fmt::Debug, V: fmt::Debug + Hash + Eq, + S: BuildHasher, { debug_fmt_fields!(DuplicatesBy, iter, meta.used); } - impl DuplicatesBy { - pub(crate) fn new(iter: I, key_method: F) -> Self { + impl DuplicatesBy { + pub(crate) fn new(iter: I, key_method: F, hash_builder: S) -> Self { Self { iter, meta: Meta { - used: HashMap::new(), + used: HashMap::with_hasher(hash_builder), pending: 0, key_method, }, @@ -34,15 +40,16 @@ mod private { } #[derive(Clone)] - pub struct Meta { - used: HashMap, + pub struct Meta { + used: HashMap, pending: usize, key_method: F, } - impl Meta + impl Meta where Key: Eq + Hash, + S: BuildHasher, { /// Takes an item and returns it back to the caller if it's the second time we see it. /// Otherwise the item is consumed and None is returned @@ -68,11 +75,12 @@ mod private { } } - impl Iterator for DuplicatesBy + impl Iterator for DuplicatesBy where I: Iterator, Key: Eq + Hash, F: KeyMethod, + S: BuildHasher, { type Item = I::Item; @@ -102,11 +110,12 @@ mod private { } } - impl DoubleEndedIterator for DuplicatesBy + impl DoubleEndedIterator for DuplicatesBy where I: DoubleEndedIterator, Key: Eq + Hash, F: KeyMethod, + S: BuildHasher, { fn next_back(&mut self) -> Option { let Self { iter, meta } = self; @@ -189,28 +198,35 @@ mod private { /// An iterator adapter to filter for duplicate elements. /// /// See [`.duplicates_by()`](crate::Itertools::duplicates_by) for more information. -pub type DuplicatesBy = private::DuplicatesBy>; - -/// Create a new `DuplicatesBy` iterator. -pub fn duplicates_by(iter: I, f: F) -> DuplicatesBy +pub type DuplicatesBy = private::DuplicatesBy, S>; + +/// Create a new `DuplicatesBy` iterator with a specified hash builder. +pub fn duplicates_by_with_hasher( + iter: I, + f: F, + hash_builder: S, +) -> DuplicatesBy where Key: Eq + Hash, F: FnMut(&I::Item) -> Key, I: Iterator, + S: BuildHasher, { - DuplicatesBy::new(iter, private::ByFn(f)) + DuplicatesBy::new(iter, private::ByFn(f), hash_builder) } /// An iterator adapter to filter out duplicate elements. /// /// See [`.duplicates()`](crate::Itertools::duplicates) for more information. -pub type Duplicates = private::DuplicatesBy::Item, private::ById>; +pub type Duplicates = + private::DuplicatesBy::Item, private::ById, S>; -/// Create a new `Duplicates` iterator. -pub fn duplicates(iter: I) -> Duplicates +/// Create a new `Duplicates` iterator with a specified hash builder. +pub fn duplicates_with_hasher(iter: I, hash_builder: S) -> Duplicates where I: Iterator, I::Item: Eq + Hash, + S: BuildHasher, { - Duplicates::new(iter, private::ById) + Duplicates::new(iter, private::ById, hash_builder) } diff --git a/src/group_map.rs b/src/group_map.rs index 3dcee83af..81c94b23b 100644 --- a/src/group_map.rs +++ b/src/group_map.rs @@ -1,5 +1,6 @@ #![cfg(feature = "use_std")] +use core::hash::BuildHasher; use std::collections::HashMap; use std::hash::Hash; use std::iter::Iterator; @@ -8,12 +9,13 @@ use std::iter::Iterator; /// /// See [`.into_group_map()`](crate::Itertools::into_group_map) /// for more information. -pub fn into_group_map(iter: I) -> HashMap> +pub fn into_group_map_with_hasher(iter: I, hash_builder: S) -> HashMap, S> where I: Iterator, K: Hash + Eq, + S: BuildHasher, { - let mut lookup = HashMap::new(); + let mut lookup = HashMap::with_hasher(hash_builder); iter.for_each(|(key, val)| { lookup.entry(key).or_insert_with(Vec::new).push(val); @@ -22,11 +24,16 @@ where lookup } -pub fn into_group_map_by(iter: I, mut f: F) -> HashMap> +pub fn into_group_map_by_with_hasher( + iter: I, + mut f: F, + hash_builder: S, +) -> HashMap, S> where I: Iterator, K: Hash + Eq, F: FnMut(&V) -> K, + S: BuildHasher, { - into_group_map(iter.map(|v| (f(&v), v))) + into_group_map_with_hasher(iter.map(|v| (f(&v), v)), hash_builder) } diff --git a/src/grouping_map.rs b/src/grouping_map.rs index 86cb55dc0..d8e435c4c 100644 --- a/src/grouping_map.rs +++ b/src/grouping_map.rs @@ -2,11 +2,12 @@ use crate::{ adaptors::map::{MapSpecialCase, MapSpecialCaseFn}, MinMaxResult, }; -use std::cmp::Ordering; +use core::hash::BuildHasher; use std::collections::HashMap; use std::hash::Hash; use std::iter::Iterator; use std::ops::{Add, Mul}; +use std::{cmp::Ordering, hash::RandomState}; /// A wrapper to allow for an easy [`into_grouping_map_by`](crate::Itertools::into_grouping_map_by) pub type MapForGrouping = MapSpecialCase>; @@ -36,18 +37,19 @@ pub(crate) fn new_map_for_grouping K>( } /// Creates a new `GroupingMap` from `iter` -pub fn new(iter: I) -> GroupingMap +pub fn new(iter: I, hash_builder: S) -> GroupingMap where I: Iterator, K: Hash + Eq, + S: BuildHasher, { - GroupingMap { iter } + GroupingMap { iter, hash_builder } } /// `GroupingMapBy` is an intermediate struct for efficient group-and-fold operations. /// /// See [`GroupingMap`] for more informations. -pub type GroupingMapBy = GroupingMap>; +pub type GroupingMapBy = GroupingMap, S>; /// `GroupingMap` is an intermediate struct for efficient group-and-fold operations. /// It groups elements by their key and at the same time fold each group @@ -56,14 +58,19 @@ pub type GroupingMapBy = GroupingMap>; /// No method on this struct performs temporary allocations. #[derive(Clone, Debug)] #[must_use = "GroupingMap is lazy and do nothing unless consumed"] -pub struct GroupingMap { +pub struct GroupingMap +where + S: BuildHasher, +{ iter: I, + hash_builder: S, } -impl GroupingMap +impl GroupingMap where I: Iterator, K: Hash + Eq, + S: BuildHasher, { /// This is the generic way to perform any operation on a `GroupingMap`. /// It's suggested to use this method only to implement custom operations @@ -105,11 +112,11 @@ where /// assert_eq!(lookup[&3], 7); /// assert_eq!(lookup.len(), 3); // The final keys are only 0, 1 and 2 /// ``` - pub fn aggregate(self, mut operation: FO) -> HashMap + pub fn aggregate(self, mut operation: FO) -> HashMap where FO: FnMut(Option, &K, V) -> Option, { - let mut destination_map = HashMap::new(); + let mut destination_map = HashMap::with_hasher(self.hash_builder); self.iter.for_each(|(key, val)| { let acc = destination_map.remove(&key); @@ -154,7 +161,7 @@ where /// assert_eq!(lookup[&2].acc, 2 + 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn fold_with(self, mut init: FI, mut operation: FO) -> HashMap + pub fn fold_with(self, mut init: FI, mut operation: FO) -> HashMap where FI: FnMut(&K, &V) -> R, FO: FnMut(R, &K, V) -> R, @@ -190,7 +197,7 @@ where /// assert_eq!(lookup[&2], 2 + 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn fold(self, init: R, operation: FO) -> HashMap + pub fn fold(self, init: R, operation: FO) -> HashMap where R: Clone, FO: FnMut(R, &K, V) -> R, @@ -225,7 +232,7 @@ where /// assert_eq!(lookup[&2], 2 + 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn reduce(self, mut operation: FO) -> HashMap + pub fn reduce(self, mut operation: FO) -> HashMap where FO: FnMut(V, &K, V) -> V, { @@ -239,7 +246,7 @@ where /// See [`.reduce()`](GroupingMap::reduce). #[deprecated(note = "Use .reduce() instead", since = "0.13.0")] - pub fn fold_first(self, operation: FO) -> HashMap + pub fn fold_first(self, operation: FO) -> HashMap where FO: FnMut(V, &K, V) -> V, { @@ -264,11 +271,11 @@ where /// assert_eq!(lookup[&2], vec![2, 5].into_iter().collect::>()); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn collect(self) -> HashMap + pub fn collect(self) -> HashMap where C: Default + Extend, { - let mut destination_map = HashMap::new(); + let mut destination_map = HashMap::with_hasher(self.hash_builder); self.iter.for_each(|(key, val)| { destination_map @@ -298,7 +305,7 @@ where /// assert_eq!(lookup[&2], 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn max(self) -> HashMap + pub fn max(self) -> HashMap where V: Ord, { @@ -324,7 +331,7 @@ where /// assert_eq!(lookup[&2], 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn max_by(self, mut compare: F) -> HashMap + pub fn max_by(self, mut compare: F) -> HashMap where F: FnMut(&K, &V, &V) -> Ordering, { @@ -353,7 +360,7 @@ where /// assert_eq!(lookup[&2], 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn max_by_key(self, mut f: F) -> HashMap + pub fn max_by_key(self, mut f: F) -> HashMap where F: FnMut(&K, &V) -> CK, CK: Ord, @@ -379,7 +386,7 @@ where /// assert_eq!(lookup[&2], 5); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn min(self) -> HashMap + pub fn min(self) -> HashMap where V: Ord, { @@ -405,7 +412,7 @@ where /// assert_eq!(lookup[&2], 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn min_by(self, mut compare: F) -> HashMap + pub fn min_by(self, mut compare: F) -> HashMap where F: FnMut(&K, &V, &V) -> Ordering, { @@ -434,7 +441,7 @@ where /// assert_eq!(lookup[&2], 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn min_by_key(self, mut f: F) -> HashMap + pub fn min_by_key(self, mut f: F) -> HashMap where F: FnMut(&K, &V) -> CK, CK: Ord, @@ -469,7 +476,7 @@ where /// assert_eq!(lookup[&2], OneElement(5)); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn minmax(self) -> HashMap> + pub fn minmax(self) -> HashMap, S> where V: Ord, { @@ -499,7 +506,7 @@ where /// assert_eq!(lookup[&2], OneElement(5)); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn minmax_by(self, mut compare: F) -> HashMap> + pub fn minmax_by(self, mut compare: F) -> HashMap, S> where F: FnMut(&K, &V, &V) -> Ordering, { @@ -550,7 +557,7 @@ where /// assert_eq!(lookup[&2], OneElement(5)); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn minmax_by_key(self, mut f: F) -> HashMap> + pub fn minmax_by_key(self, mut f: F) -> HashMap, S> where F: FnMut(&K, &V) -> CK, CK: Ord, @@ -577,7 +584,7 @@ where /// assert_eq!(lookup[&2], 5 + 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn sum(self) -> HashMap + pub fn sum(self) -> HashMap where V: Add, { @@ -603,7 +610,7 @@ where /// assert_eq!(lookup[&2], 5 * 8); /// assert_eq!(lookup.len(), 3); /// ``` - pub fn product(self) -> HashMap + pub fn product(self) -> HashMap where V: Mul, { diff --git a/src/lib.rs b/src/lib.rs index 834a48dea..d204f5d27 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,6 +63,8 @@ use alloc::{collections::VecDeque, string::String, vec::Vec}; pub use either::Either; use core::borrow::Borrow; +#[cfg(feature = "use_std")] +use core::hash::BuildHasher; use std::cmp::Ordering; #[cfg(feature = "use_std")] use std::collections::HashMap; @@ -72,7 +74,7 @@ use std::fmt; #[cfg(feature = "use_alloc")] use std::fmt::Write; #[cfg(feature = "use_std")] -use std::hash::Hash; +use std::hash::{Hash, RandomState}; use std::iter::{once, IntoIterator}; #[cfg(feature = "use_alloc")] type VecDequeIntoIter = alloc::collections::vec_deque::IntoIter; @@ -1406,7 +1408,33 @@ pub trait Itertools: Iterator { Self: Sized, Self::Item: Eq + Hash, { - duplicates_impl::duplicates(self) + duplicates_impl::duplicates_with_hasher(self, RandomState::new()) + } + + /// Return an iterator which yields the same elements as the one returned by + /// [.duplicates()](crate::Itertools::duplicates), but uses the specified hash builder to hash + /// the elements for comparison. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's + /// users to be resistant to attacks that cause many collisions and very poor performance. + /// Setting it manually using this function can expose a DoS attack vector. + /// + /// ``` + /// use std::hash::RandomState; + /// use itertools::Itertools; + /// + /// let data = vec![10, 20, 30, 20, 40, 10, 50]; + /// itertools::assert_equal(data.into_iter().duplicates_with_hasher(RandomState::new()), + /// vec![20,10]); + /// ``` + #[cfg(feature = "use_std")] + fn duplicates_with_hasher(self, hash_builder: S) -> Duplicates + where + Self: Sized, + Self::Item: Eq + Hash, + S: BuildHasher, + { + duplicates_impl::duplicates_with_hasher(self, hash_builder) } /// Return an iterator adaptor that produces elements that appear more than once during the @@ -1433,7 +1461,38 @@ pub trait Itertools: Iterator { V: Eq + Hash, F: FnMut(&Self::Item) -> V, { - duplicates_impl::duplicates_by(self, f) + duplicates_impl::duplicates_by_with_hasher(self, f, RandomState::new()) + } + + /// Return an iterator which yields the same elements as the one returned by + /// [.duplicates_by()](crate::Itertools::duplicates_by), but uses the specified hash builder to + /// hash the keys for comparison. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's + /// users to be resistant to attacks that cause many collisions and very poor performance. + /// Setting it manually using this function can expose a DoS attack vector. + /// + /// ``` + /// use std::hash::RandomState; + /// use itertools::Itertools; + /// + /// let data = vec!["a", "bb", "aa", "c", "ccc"]; + /// itertools::assert_equal(data.into_iter().duplicates_by_with_hasher(|s| s.len(),RandomState::new()), + /// vec!["aa", "c"]); + /// ``` + #[cfg(feature = "use_std")] + fn duplicates_by_with_hasher( + self, + f: F, + hash_builder: S, + ) -> DuplicatesBy + where + Self: Sized, + V: Eq + Hash, + F: FnMut(&Self::Item) -> V, + S: BuildHasher, + { + duplicates_impl::duplicates_by_with_hasher(self, f, hash_builder) } /// Return an iterator adaptor that filters out elements that have @@ -1460,7 +1519,33 @@ pub trait Itertools: Iterator { Self: Sized, Self::Item: Clone + Eq + Hash, { - unique_impl::unique(self) + unique_impl::unique_with_hasher(self, RandomState::new()) + } + + /// Return an iterator which yields the same elements as the one returned by + /// [.unique()](crate::Itertools::unique), but uses the specified hash builder to hash the + /// elements for comparison. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's + /// users to be resistant to attacks that cause many collisions and very poor performance. + /// Setting it manually using this function can expose a DoS attack vector. + /// + /// ``` + /// use std::hash::RandomState; + /// use itertools::Itertools; + /// + /// let data = vec![10, 20, 30, 20, 40, 10, 50]; + /// itertools::assert_equal(data.into_iter().unique_with_hasher(RandomState::new()), + /// vec![10, 20, 30, 40, 50]); + /// ``` + #[cfg(feature = "use_std")] + fn unique_with_hasher(self, hash_builder: S) -> Unique + where + Self: Sized, + Self::Item: Clone + Eq + Hash, + S: BuildHasher, + { + unique_impl::unique_with_hasher(self, hash_builder) } /// Return an iterator adaptor that filters out elements that have @@ -1488,7 +1573,34 @@ pub trait Itertools: Iterator { V: Eq + Hash, F: FnMut(&Self::Item) -> V, { - unique_impl::unique_by(self, f) + unique_impl::unique_by_with_hasher(self, f, RandomState::new()) + } + + /// Return an iterator which yields the same elements as the one returned by + /// [.unique_by()](crate::Itertools::unique_by), but uses the specified hash builder to hash + /// the elements for comparison. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's + /// users to be resistant to attacks that cause many collisions and very poor performance. + /// Setting it manually using this function can expose a DoS attack vector. + /// + /// ``` + /// use std::hash::RandomState; + /// use itertools::Itertools; + /// + /// let data = vec!["a", "bb", "aa", "c", "ccc"]; + /// itertools::assert_equal(data.into_iter().unique_by_with_hasher(|s| s.len(), RandomState::new()), + /// vec!["a", "bb", "ccc"]); + /// ``` + #[cfg(feature = "use_std")] + fn unique_by_with_hasher(self, f: F, hash_builder: S) -> UniqueBy + where + Self: Sized, + V: Eq + Hash, + F: FnMut(&Self::Item) -> V, + S: BuildHasher, + { + unique_impl::unique_by_with_hasher(self, f, hash_builder) } /// Return an iterator adaptor that borrows from this iterator and @@ -2222,7 +2334,32 @@ pub trait Itertools: Iterator { Self: Sized, Self::Item: Eq + Hash, { - let mut used = HashSet::new(); + self.all_unique_with_hasher(RandomState::new()) + } + + /// Check whether all elements are unique (non equal). The specified hash builder is used for + /// hashing the elements. See [.all_unique](crate::Itertools::all_unique). + /// + /// ``` + /// use std::hash::RandomState; + /// use itertools::Itertools; + /// + /// let data = vec![1, 2, 3, 4, 1, 5]; + /// assert!(!data.iter().all_unique_with_hasher(RandomState::new())); + /// assert!(data[0..4].iter().all_unique_with_hasher(RandomState::new())); + /// assert!(data[1..6].iter().all_unique_with_hasher(RandomState::new())); + /// + /// let data : Option = None; + /// assert!(data.into_iter().all_unique_with_hasher(RandomState::new())); + /// ``` + #[cfg(feature = "use_std")] + fn all_unique_with_hasher(&mut self, hash_builder: S) -> bool + where + Self: Sized, + Self::Item: Eq + Hash, + S: BuildHasher, + { + let mut used = HashSet::with_hasher(hash_builder); self.all(move |elt| used.insert(elt)) } @@ -3656,7 +3793,36 @@ pub trait Itertools: Iterator { Self: Iterator + Sized, K: Hash + Eq, { - group_map::into_group_map(self) + group_map::into_group_map_with_hasher(self, RandomState::new()) + } + + /// Return a `HashMap` of keys mapped to `Vec`s of values, using the hash builder for hashing. + /// See [.into_group_map()](crate::Itertools::into_group_map) for more information. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's + /// users to be resistant to attacks that cause many collisions and very poor performance. + /// Setting it manually using this function can expose a DoS attack vector. + /// + /// ``` + /// use std::hash::RandomState; + /// use itertools::Itertools; + /// + /// let data = vec![(0, 10), (2, 12), (3, 13), (0, 20), (3, 33), (2, 42)]; + /// let lookup = data.into_iter().into_group_map_with_hasher(RandomState::new()); + /// + /// assert_eq!(lookup[&0], vec![10, 20]); + /// assert_eq!(lookup.get(&1), None); + /// assert_eq!(lookup[&2], vec![12, 42]); + /// assert_eq!(lookup[&3], vec![13, 33]); + /// ``` + #[cfg(feature = "use_std")] + fn into_group_map_with_hasher(self, hash_builder: S) -> HashMap, S> + where + Self: Iterator + Sized, + K: Hash + Eq, + S: BuildHasher, + { + group_map::into_group_map_with_hasher(self, hash_builder) } /// Return a `HashMap` of keys mapped to `Vec`s of values. The key is specified @@ -3693,7 +3859,52 @@ pub trait Itertools: Iterator { K: Hash + Eq, F: FnMut(&V) -> K, { - group_map::into_group_map_by(self, f) + group_map::into_group_map_by_with_hasher(self, f, RandomState::new()) + } + + /// Return a `HashMap` of keys mapped to `Vec`s of values, using the hash builder for hashing. + /// See [.into_group_map_by()](crate::Itertools::into_group_map_by) for more information. + /// + /// Warning: `hash_builder` is normally randomly generated, and is designed to allow it's + /// users to be resistant to attacks that cause many collisions and very poor performance. + /// Setting it manually using this function can expose a DoS attack vector. + /// + /// ``` + /// use itertools::Itertools; + /// use std::collections::HashMap; + /// use std::hash::RandomState; + /// + /// let data = vec![(0, 10), (2, 12), (3, 13), (0, 20), (3, 33), (2, 42)]; + /// let lookup: HashMap, RandomState> = + /// data.clone().into_iter().into_group_map_by_with_hasher(|a| a.0, RandomState::new()); + /// + /// assert_eq!(lookup[&0], vec![(0,10), (0,20)]); + /// assert_eq!(lookup.get(&1), None); + /// assert_eq!(lookup[&2], vec![(2,12), (2,42)]); + /// assert_eq!(lookup[&3], vec![(3,13), (3,33)]); + /// + /// assert_eq!( + /// data.into_iter() + /// .into_group_map_by_with_hasher(|x| x.0, RandomState::new()) + /// .into_iter() + /// .map(|(key, values)| (key, values.into_iter().fold(0,|acc, (_,v)| acc + v ))) + /// .collect::>()[&0], + /// 30, + /// ); + /// ``` + #[cfg(feature = "use_std")] + fn into_group_map_by_with_hasher( + self, + f: F, + hash_builder: S, + ) -> HashMap, S> + where + Self: Iterator + Sized, + K: Hash + Eq, + F: FnMut(&V) -> K, + S: BuildHasher, + { + group_map::into_group_map_by_with_hasher(self, f, hash_builder) } /// Constructs a `GroupingMap` to be used later with one of the efficient @@ -3711,7 +3922,21 @@ pub trait Itertools: Iterator { Self: Iterator + Sized, K: Hash + Eq, { - grouping_map::new(self) + grouping_map::new(self, RandomState::new()) + } + + /// Constructs a `GroupingMap` to be used later with one of the efficient + /// group-and-fold operations it allows to perform, using the specified hash builder for + /// hashing the elements. + /// See [.into_grouping_map()](crate::Itertools::into_grouping_map) for more information. + #[cfg(feature = "use_std")] + fn into_grouping_map_with_hasher(self, hash_builder: S) -> GroupingMap + where + Self: Iterator + Sized, + K: Hash + Eq, + S: BuildHasher, + { + grouping_map::new(self, hash_builder) } /// Constructs a `GroupingMap` to be used later with one of the efficient @@ -3729,7 +3954,32 @@ pub trait Itertools: Iterator { K: Hash + Eq, F: FnMut(&V) -> K, { - grouping_map::new(grouping_map::new_map_for_grouping(self, key_mapper)) + grouping_map::new( + grouping_map::new_map_for_grouping(self, key_mapper), + RandomState::new(), + ) + } + + /// Constructs a `GroupingMap` to be used later with one of the efficient + /// group-and-fold operations it allows to perform, using the specified hash builder for + /// hashing the keys. + /// See [.into_grouping_map_by()](crate::Itertools::into_grouping_map_by) for more information. + #[cfg(feature = "use_std")] + fn into_grouping_map_by_with_hasher( + self, + key_mapper: F, + hash_builder: S, + ) -> GroupingMapBy + where + Self: Iterator + Sized, + K: Hash + Eq, + F: FnMut(&V) -> K, + S: BuildHasher, + { + grouping_map::new( + grouping_map::new_map_for_grouping(self, key_mapper), + hash_builder, + ) } /// Return all minimum elements of an iterator. @@ -4439,7 +4689,19 @@ pub trait Itertools: Iterator { Self: Sized, Self::Item: Eq + Hash, { - let mut counts = HashMap::new(); + self.counts_with_hasher(RandomState::new()) + } + + /// Collect the items in this iterator and return a `HashMap` the same way + /// [.counts()](crate::Itertools::counts) does, but use the specified hash builder for hashing. + #[cfg(feature = "use_std")] + fn counts_with_hasher(self, hash_builder: S) -> HashMap + where + Self: Sized, + Self::Item: Eq + Hash, + S: BuildHasher, + { + let mut counts = HashMap::with_hasher(hash_builder); self.for_each(|item| *counts.entry(item).or_default() += 1); counts } @@ -4484,7 +4746,20 @@ pub trait Itertools: Iterator { K: Eq + Hash, F: FnMut(Self::Item) -> K, { - self.map(f).counts() + self.counts_by_with_hasher(f, RandomState::new()) + } + + /// Collect the items in this iterator and return a `HashMap` the same way + /// [.counts_by()](crate::Itertools::counts_by) does, but use the specified hash builder for hashing. + #[cfg(feature = "use_std")] + fn counts_by_with_hasher(self, f: F, hash_builder: S) -> HashMap + where + Self: Sized, + K: Eq + Hash, + F: FnMut(Self::Item) -> K, + S: BuildHasher, + { + self.map(f).counts_with_hasher(hash_builder) } /// Converts an iterator of tuples into a tuple of containers. diff --git a/src/unique_impl.rs b/src/unique_impl.rs index 0f6397e48..11b2608b1 100644 --- a/src/unique_impl.rs +++ b/src/unique_impl.rs @@ -1,7 +1,8 @@ +use core::hash::BuildHasher; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::fmt; -use std::hash::Hash; +use std::hash::{Hash, RandomState}; use std::iter::FusedIterator; /// An iterator adapter to filter out duplicate elements. @@ -9,42 +10,48 @@ use std::iter::FusedIterator; /// See [`.unique_by()`](crate::Itertools::unique) for more information. #[derive(Clone)] #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -pub struct UniqueBy { +pub struct UniqueBy +where + S: BuildHasher, +{ iter: I, // Use a Hashmap for the Entry API in order to prevent hashing twice. // This can maybe be replaced with a HashSet once `get_or_insert_with` // or a proper Entry API for Hashset is stable and meets this msrv - used: HashMap, + used: HashMap, f: F, } -impl fmt::Debug for UniqueBy +impl fmt::Debug for UniqueBy where I: Iterator + fmt::Debug, V: fmt::Debug + Hash + Eq, + S: BuildHasher, { debug_fmt_fields!(UniqueBy, iter, used); } /// Create a new `UniqueBy` iterator. -pub fn unique_by(iter: I, f: F) -> UniqueBy +pub fn unique_by_with_hasher(iter: I, f: F, hash_builder: S) -> UniqueBy where V: Eq + Hash, F: FnMut(&I::Item) -> V, I: Iterator, + S: BuildHasher, { UniqueBy { iter, - used: HashMap::new(), + used: HashMap::with_hasher(hash_builder), f, } } // count the number of new unique keys in iterable (`used` is the set already seen) -fn count_new_keys(mut used: HashMap, iterable: I) -> usize +fn count_new_keys(mut used: HashMap, iterable: I) -> usize where I: IntoIterator, K: Hash + Eq, + S: BuildHasher, { let iter = iterable.into_iter(); let current_used = used.len(); @@ -52,11 +59,12 @@ where used.len() - current_used } -impl Iterator for UniqueBy +impl Iterator for UniqueBy where I: Iterator, V: Eq + Hash, F: FnMut(&I::Item) -> V, + S: BuildHasher, { type Item = I::Item; @@ -77,11 +85,12 @@ where } } -impl DoubleEndedIterator for UniqueBy +impl DoubleEndedIterator for UniqueBy where I: DoubleEndedIterator, V: Eq + Hash, F: FnMut(&I::Item) -> V, + S: BuildHasher, { fn next_back(&mut self) -> Option { let Self { iter, used, f } = self; @@ -89,18 +98,20 @@ where } } -impl FusedIterator for UniqueBy +impl FusedIterator for UniqueBy where I: FusedIterator, V: Eq + Hash, F: FnMut(&I::Item) -> V, + S: BuildHasher, { } -impl Iterator for Unique +impl Iterator for Unique where I: Iterator, I::Item: Eq + Hash + Clone, + S: BuildHasher, { type Item = I::Item; @@ -127,10 +138,11 @@ where } } -impl DoubleEndedIterator for Unique +impl DoubleEndedIterator for Unique where I: DoubleEndedIterator, I::Item: Eq + Hash + Clone, + S: BuildHasher, { fn next_back(&mut self) -> Option { let UniqueBy { iter, used, .. } = &mut self.iter; @@ -145,10 +157,11 @@ where } } -impl FusedIterator for Unique +impl FusedIterator for Unique where I: FusedIterator, I::Item: Eq + Hash + Clone, + S: BuildHasher, { } @@ -157,31 +170,34 @@ where /// See [`.unique()`](crate::Itertools::unique) for more information. #[derive(Clone)] #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] -pub struct Unique +pub struct Unique where I: Iterator, I::Item: Eq + Hash + Clone, + S: BuildHasher, { - iter: UniqueBy, + iter: UniqueBy, } -impl fmt::Debug for Unique +impl fmt::Debug for Unique where I: Iterator + fmt::Debug, I::Item: Hash + Eq + fmt::Debug + Clone, + S: BuildHasher, { debug_fmt_fields!(Unique, iter); } -pub fn unique(iter: I) -> Unique +pub fn unique_with_hasher(iter: I, hash_builder: S) -> Unique where I: Iterator, I::Item: Eq + Hash + Clone, + S: BuildHasher, { Unique { iter: UniqueBy { iter, - used: HashMap::new(), + used: HashMap::with_hasher(hash_builder), f: (), }, } diff --git a/tests/test_std.rs b/tests/test_std.rs index ad391faad..d024d86ba 100644 --- a/tests/test_std.rs +++ b/tests/test_std.rs @@ -20,8 +20,31 @@ use rand::{ Rng, SeedableRng, }; use rand::{seq::SliceRandom, thread_rng}; +use std::collections::HashMap; +use std::hash::BuildHasher; +use std::hash::RandomState; +use std::iter::empty; use std::{cmp::min, fmt::Debug, marker::PhantomData}; +// A Hasher which forwards it's calls to RandomState to make sure different hashers +// are accepted in the various *_with_hasher methods. +#[derive(Default)] +struct TestHasher(RandomState); + +impl TestHasher { + fn new() -> Self { + TestHasher(RandomState::new()) + } +} + +impl BuildHasher for TestHasher { + type Hasher = ::Hasher; + + fn build_hasher(&self) -> Self::Hasher { + self.0.build_hasher() + } +} + #[test] fn product3() { let prod = iproduct!(0..3, 0..2, 0..2); @@ -76,6 +99,10 @@ fn duplicates_by() { ys_rev.iter(), xs.iter().duplicates_by(|x| x[..2].to_string()).rev(), ); + + let _ = empty::() + .duplicates_by_with_hasher(|x| *x, TestHasher::new()) + .next(); } #[test] @@ -103,6 +130,10 @@ fn duplicates() { ); let ys_rev = vec![2, 1]; assert_eq!(ys_rev, xs.iter().duplicates().rev().cloned().collect_vec()); + + let _ = empty::() + .duplicates_with_hasher(TestHasher::new()) + .next(); } #[test] @@ -119,6 +150,8 @@ fn unique_by() { ys_rev.iter(), xs.iter().unique_by(|x| x[..2].to_string()).rev(), ); + + let _ = empty::().unique_by_with_hasher(|x| *x, TestHasher::new()); } #[test] @@ -136,6 +169,8 @@ fn unique() { it::assert_equal(ys.iter(), xs.iter().rev().unique().rev()); let ys_rev = [1, 0]; it::assert_equal(ys_rev.iter(), xs.iter().unique().rev()); + + let _ = empty::().unique_with_hasher(TestHasher::new()); } #[test] @@ -301,6 +336,8 @@ fn all_unique() { assert!("ABCDEFGH".chars().all_unique()); assert!(!"ABCDEFGA".chars().all_unique()); assert!(::std::iter::empty::().all_unique()); + + let _ = empty::().all_unique_with_hasher(TestHasher::new()); } #[test] @@ -1567,3 +1604,40 @@ fn multiunzip() { ) ); } + +#[test] +fn into_group_map_with_hasher() { + let _: HashMap<_, _, TestHasher> = + empty::<(u8, u8)>().into_group_map_with_hasher(TestHasher::new()); +} + +#[test] +fn into_group_map_by_with_hasher() { + let _: HashMap<_, _, TestHasher> = + empty::<(u8, u8)>().into_group_map_by_with_hasher(|x| *x, TestHasher::new()); +} + +#[test] +fn into_grouping_map_with_hasher() { + let _: HashMap<_, Vec<_>, TestHasher> = empty::<(u8, u8)>() + .into_grouping_map_with_hasher(TestHasher::new()) + .collect(); +} + +#[test] +fn into_grouping_map_by_with_hasher() { + let _: HashMap<_, Vec<_>, TestHasher> = empty::<(u8, u8)>() + .into_grouping_map_by_with_hasher(|x| *x, TestHasher::new()) + .collect(); +} + +#[test] +fn counts_with_hasher() { + let _: HashMap<_, _, TestHasher> = empty::().counts_with_hasher(TestHasher::new()); +} + +#[test] +fn counts_by_with_hasher() { + let _: HashMap<_, _, TestHasher> = + empty::().counts_by_with_hasher(|x| x, TestHasher::new()); +}