From c1ddda229df96f7b8171609f3490128d8928e041 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Sat, 20 Sep 2025 10:05:50 -0600 Subject: [PATCH 1/5] bench: add with_capacity benchmark --- benches/with_capacity.rs | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 benches/with_capacity.rs diff --git a/benches/with_capacity.rs b/benches/with_capacity.rs new file mode 100644 index 000000000..eeb85b59a --- /dev/null +++ b/benches/with_capacity.rs @@ -0,0 +1,38 @@ +#![feature(test)] + +extern crate test; + +use hashbrown::HashMap; +use test::{black_box, Bencher}; + +type Map = HashMap; + +macro_rules! bench_with_capacity { + ($name:ident, $cap:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + b.iter(|| { + // Construct a new empty map with a given capacity and return it to avoid + // being optimized away. Dropping it measures allocation + minimal setup. + let m: Map = Map::with_capacity($cap); + black_box(m) + }); + } + }; +} + +bench_with_capacity!(with_capacity_000000, 0); +bench_with_capacity!(with_capacity_000001, 1); +bench_with_capacity!(with_capacity_000003, 3); +bench_with_capacity!(with_capacity_000007, 7); +bench_with_capacity!(with_capacity_000008, 8); +bench_with_capacity!(with_capacity_000016, 16); +bench_with_capacity!(with_capacity_000032, 32); +bench_with_capacity!(with_capacity_000064, 64); +bench_with_capacity!(with_capacity_000128, 128); +bench_with_capacity!(with_capacity_000256, 256); +bench_with_capacity!(with_capacity_000512, 512); +bench_with_capacity!(with_capacity_001024, 1024); +bench_with_capacity!(with_capacity_004096, 4096); +bench_with_capacity!(with_capacity_016384, 16384); +bench_with_capacity!(with_capacity_065536, 65536); From a983f30c785191939704128d4b487a62538bb841 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Sat, 20 Sep 2025 09:50:08 -0600 Subject: [PATCH 2/5] feat: recognize and use over sized allocations Allocators are allowed to return a larger memory chunk than was asked for. If the amount extra is large enough, then the hash table can use the extra space. The Global allocator will not hit this path, because it won't over-size enough to matter, but custom allocators may. An example of an allocator which allocates full system pages is included in the test suite (UNIX only because it uses `mmap`). --- Cargo.toml | 3 ++ src/map.rs | 133 +++++++++++++++++++++++++++++++++++++++++++++++ src/raw/alloc.rs | 27 +++++++--- src/raw/mod.rs | 73 ++++++++++++++++++++++++-- 4 files changed, 225 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 86ec4afe1..45e433fff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,9 @@ serde_test = "1.0" doc-comment = "0.3.1" bumpalo = { version = "3.13.0", features = ["allocator-api2"] } +[target.'cfg(unix)'.dev-dependencies] +libc = "0.2.155" + [features] default = ["default-hasher", "inline-more", "allocator-api2", "equivalent", "raw-entry"] diff --git a/src/map.rs b/src/map.rs index 86f0ce09a..9890dc3d6 100644 --- a/src/map.rs +++ b/src/map.rs @@ -6631,3 +6631,136 @@ mod test_map { ); } } + +#[cfg(all(test, unix, any(feature = "nightly", feature = "allocator-api2")))] +mod test_map_with_mmap_allocations { + use super::HashMap; + use crate::raw::prev_pow2; + use core::alloc::Layout; + use core::ptr::{null_mut, NonNull}; + + #[cfg(feature = "nightly")] + use core::alloc::{AllocError, Allocator}; + + #[cfg(all(feature = "allocator-api2", not(feature = "nightly")))] + use allocator_api2::alloc::{AllocError, Allocator}; + + /// This is not a production quality allocator, just good enough for + /// some basic tests. + #[derive(Clone, Copy, Debug)] + struct MmapAllocator { + /// Guarantee this is a power of 2. + page_size: usize, + } + + impl MmapAllocator { + fn new() -> Result { + let result = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; + if result < 1 { + return Err(AllocError); + } + + let page_size = result as usize; + if !page_size.is_power_of_two() { + Err(AllocError) + } else { + Ok(Self { page_size }) + } + } + + fn fit_to_page_size(&self, n: usize) -> Result { + // If n=0, give a single page (wasteful, I know). + let n = if n == 0 { self.page_size } else { n }; + + match n & (self.page_size - 1) { + 0 => Ok(n), + rem => n.checked_add(self.page_size - rem).ok_or(AllocError), + } + } + } + + unsafe impl Allocator for MmapAllocator { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.align() > self.page_size { + return Err(AllocError); + } + + let null = null_mut(); + let len = self.fit_to_page_size(layout.size())? as libc::size_t; + let prot = libc::PROT_READ | libc::PROT_WRITE; + let flags = libc::MAP_PRIVATE | libc::MAP_ANON; + let addr = unsafe { libc::mmap(null, len, prot, flags, -1, 0) }; + + // mmap returns MAP_FAILED on failure, not Null. + if addr == libc::MAP_FAILED { + return Err(AllocError); + } + + match NonNull::new(addr.cast()) { + Some(data) => { + // SAFETY: this is NonNull::slice_from_raw_parts. + Ok(unsafe { + NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut( + data.as_ptr(), + len, + )) + }) + } + + // This branch shouldn't be taken in practice, but since we + // cannot return null as a valid pointer in our type system, + // we attempt to handle it. + None => { + _ = unsafe { libc::munmap(addr, len) }; + Err(AllocError) + } + } + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + // If they allocated it with this layout, it must round correctly. + let size = self.fit_to_page_size(layout.size()).unwrap(); + let _result = libc::munmap(ptr.as_ptr().cast(), size); + debug_assert_eq!(0, _result) + } + } + + #[test] + fn test_tiny_allocation_gets_rounded_to_page_size() { + let alloc = MmapAllocator::new().unwrap(); + let mut map: HashMap = HashMap::with_capacity_in(1, alloc); + + // Size of an element plus its control byte. + let rough_bucket_size = core::mem::size_of::<(usize, ())>() + 1; + + // Accounting for some misc. padding that's likely in the allocation + // due to rounding to group width, etc. + let overhead = 3 * core::mem::size_of::(); + let num_buckets = (alloc.page_size - overhead) / rough_bucket_size; + // Buckets are always powers of 2. + let min_elems = prev_pow2(num_buckets); + // Real load-factor is 7/8, but this is a lower estimation, so 1/2. + let min_capacity = min_elems >> 1; + let capacity = map.capacity(); + assert!( + capacity >= min_capacity, + "failed: {capacity} >= {min_capacity}" + ); + + // Fill it up. + for i in 0..capacity { + map.insert(i, ()); + } + // Capacity should not have changed and it should be full. + assert_eq!(capacity, map.len()); + assert_eq!(capacity, map.capacity()); + + // Alright, make it grow. + map.insert(capacity, ()); + assert!( + capacity < map.capacity(), + "failed: {capacity} < {}", + map.capacity() + ); + } +} diff --git a/src/raw/alloc.rs b/src/raw/alloc.rs index c01e2a45c..bacb4a149 100644 --- a/src/raw/alloc.rs +++ b/src/raw/alloc.rs @@ -15,9 +15,9 @@ mod inner { use core::ptr::NonNull; #[allow(clippy::map_err_ignore)] - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { match alloc.allocate(layout) { - Ok(ptr) => Ok(ptr.as_non_null_ptr()), + Ok(ptr) => Ok(ptr), Err(_) => Err(()), } } @@ -38,9 +38,9 @@ mod inner { use core::ptr::NonNull; #[allow(clippy::map_err_ignore)] - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { match alloc.allocate(layout) { - Ok(ptr) => Ok(ptr.cast()), + Ok(ptr) => Ok(ptr), Err(_) => Err(()), } } @@ -61,7 +61,7 @@ mod inner { #[allow(clippy::missing_safety_doc)] // not exposed outside of this crate pub unsafe trait Allocator { - fn allocate(&self, layout: Layout) -> Result, ()>; + fn allocate(&self, layout: Layout) -> Result, ()>; unsafe fn deallocate(&self, ptr: NonNull, layout: Layout); } @@ -70,8 +70,19 @@ mod inner { unsafe impl Allocator for Global { #[inline] - fn allocate(&self, layout: Layout) -> Result, ()> { - unsafe { NonNull::new(alloc(layout)).ok_or(()) } + fn allocate(&self, layout: Layout) -> Result, ()> { + match unsafe { NonNull::new(alloc(layout)) } { + Some(data) => { + // SAFETY: this is NonNull::slice_from_raw_parts. + Ok(unsafe { + NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut( + data.as_ptr(), + layout.size(), + )) + }) + } + None => Err(()), + } } #[inline] unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { @@ -86,7 +97,7 @@ mod inner { } } - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { alloc.allocate(layout) } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 6a8d37d82..69b079217 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1442,6 +1442,40 @@ impl RawTableInner { } } +/// Find the previous power of 2. If it's already a power of 2, it's unchanged. +/// Passing zero is undefined behavior. +pub(crate) fn prev_pow2(z: usize) -> usize { + let shift = mem::size_of::() * 8 - 1; + 1 << (shift - (z.leading_zeros() as usize)) +} + +fn maximum_buckets_in( + allocation_size: usize, + table_layout: TableLayout, + group_width: usize, +) -> usize { + // Given an equation like: + // z >= x * y + x + g + // x can be maximized by doing: + // x = (z - g) / (y + 1) + // If you squint: + // x is the number of buckets + // y is the table_layout.size + // z is the size of the allocation + // g is the group width + // But this is ignoring the padding needed for ctrl_align. + // If we remember these restrictions: + // x is always a power of 2 + // Layout size for T must always be a multiple of T + // Then the alignment can be ignored if we add the constraint: + // x * y >= table_layout.ctrl_align + // This is taken care of by `capacity_to_buckets`. + let numerator = allocation_size - group_width; + let denominator = table_layout.size + 1; // todo: ZSTs? + let quotient = numerator / denominator; + prev_pow2(quotient) +} + impl RawTableInner { /// Allocates a new [`RawTableInner`] with the given number of buckets. /// The control bytes and buckets are left uninitialized. @@ -1459,7 +1493,7 @@ impl RawTableInner { unsafe fn new_uninitialized( alloc: &A, table_layout: TableLayout, - buckets: usize, + mut buckets: usize, fallibility: Fallibility, ) -> Result where @@ -1468,13 +1502,29 @@ impl RawTableInner { debug_assert!(buckets.is_power_of_two()); // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) { + let (layout, mut ctrl_offset) = match table_layout.calculate_layout_for(buckets) { Some(lco) => lco, None => return Err(fallibility.capacity_overflow()), }; let ptr: NonNull = match do_alloc(alloc, layout) { - Ok(block) => block.cast(), + Ok(block) => { + // Utilize over-sized allocations. + let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); + debug_assert!(x >= buckets); + // Calculate the new ctrl_offset. + let (_oversized_layout, oversized_ctrl_offset) = + match table_layout.calculate_layout_for(x) { + Some(lco) => lco, + None => unsafe { hint::unreachable_unchecked() }, + }; + debug_assert!(_oversized_layout.size() <= block.len()); + debug_assert!(oversized_ctrl_offset >= ctrl_offset); + ctrl_offset = oversized_ctrl_offset; + buckets = x; + + block.cast() + } Err(_) => return Err(fallibility.alloc_err(layout)), }; @@ -4168,6 +4218,23 @@ impl RawExtractIf<'_, T, A> { mod test_map { use super::*; + #[test] + fn test_prev_pow2() { + // Skip 0, not defined for that input. + let mut pow2: usize = 1; + while (pow2 << 1) > 0 { + let next_pow2 = pow2 << 1; + assert_eq!(pow2, prev_pow2(pow2)); + // Need to skip 2, because it's also a power of 2, so it doesn't + // return the previous power of 2. + if next_pow2 > 2 { + assert_eq!(pow2, prev_pow2(pow2 + 1)); + assert_eq!(pow2, prev_pow2(next_pow2 - 1)); + } + pow2 = next_pow2; + } + } + #[test] fn test_minimum_capacity_for_small_types() { #[track_caller] From e8370e0dade9aecee5f17d205027457cf9470aab Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Mon, 22 Sep 2025 14:57:08 -0600 Subject: [PATCH 3/5] perf: fast-path for not oversized allocations --- src/raw/mod.rs | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 69b079217..5892dbff9 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1509,19 +1509,21 @@ impl RawTableInner { let ptr: NonNull = match do_alloc(alloc, layout) { Ok(block) => { - // Utilize over-sized allocations. - let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); - debug_assert!(x >= buckets); - // Calculate the new ctrl_offset. - let (_oversized_layout, oversized_ctrl_offset) = - match table_layout.calculate_layout_for(x) { - Some(lco) => lco, - None => unsafe { hint::unreachable_unchecked() }, - }; - debug_assert!(_oversized_layout.size() <= block.len()); - debug_assert!(oversized_ctrl_offset >= ctrl_offset); - ctrl_offset = oversized_ctrl_offset; - buckets = x; + if block.len() > layout.size() { + // Utilize over-sized allocations. + let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); + debug_assert!(x >= buckets); + // Calculate the new ctrl_offset. + let (_oversized_layout, oversized_ctrl_offset) = + match table_layout.calculate_layout_for(x) { + Some(lco) => lco, + None => unsafe { hint::unreachable_unchecked() }, + }; + debug_assert!(_oversized_layout.size() <= block.len()); + debug_assert!(oversized_ctrl_offset >= ctrl_offset); + ctrl_offset = oversized_ctrl_offset; + buckets = x; + } block.cast() } From 274ae5f41714880baf248acb55aff49c9e2df9f7 Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Sun, 28 Sep 2025 15:09:19 -0600 Subject: [PATCH 4/5] style: address feedback from code review --- src/raw/mod.rs | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 5892dbff9..dc3b35ce4 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -97,6 +97,8 @@ impl ProbeSeq { /// taking the maximum load factor into account. /// /// Returns `None` if an overflow occurs. +/// +/// This ensures that `buckets * table_layout.size >= table_layout.ctrl_align`. // Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258 #[cfg_attr(target_os = "emscripten", inline(never))] #[cfg_attr(not(target_os = "emscripten"), inline)] @@ -138,13 +140,15 @@ fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option { // We don't bother with a table size of 2 buckets since that can only // hold a single element. Instead, we skip directly to a 4 bucket table // which can hold 3 elements. - return Some(if cap < 4 { + let buckets = if cap < 4 { 4 } else if cap < 8 { 8 } else { 16 - }); + }; + ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); + Some(buckets) } // Otherwise require 1/8 buckets to be empty (87.5% load) @@ -156,7 +160,22 @@ fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option { // Any overflows will have been caught by the checked_mul. Also, any // rounding errors from the division above will be cleaned up by // next_power_of_two (which can't overflow because of the previous division). - Some(adjusted_cap.next_power_of_two()) + let buckets = adjusted_cap.next_power_of_two(); + ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); + Some(buckets) +} + +// `maximum_buckets_in` relies on the property that for non-ZST `T`, any +// chosen `buckets` will satisfy `buckets * table_layout.size >= +// table_layout.ctrl_align`, so `calculate_layout_for` does not need to add +// extra padding beyond `table_layout.size * buckets`. If small-table bucket +// selection or growth policy changes, revisit `maximum_buckets_in`. +#[inline] +fn ensure_bucket_bytes_at_least_ctrl_align(table_layout: TableLayout, buckets: usize) { + if table_layout.size != 0 { + let prod = table_layout.size.saturating_mul(buckets); + debug_assert!(prod >= table_layout.ctrl_align); + } } /// Returns the maximum effective capacity for the given bucket mask, taking @@ -1449,6 +1468,11 @@ pub(crate) fn prev_pow2(z: usize) -> usize { 1 << (shift - (z.leading_zeros() as usize)) } +/// Finds the largest number of buckets that can fit in `allocation_size` +/// provided the given TableLayout. +/// +/// This relies on some invariants of `capacity_to_buckets`, so only feed in +/// an `allocation_size` calculated from `capacity_to_buckets`. fn maximum_buckets_in( allocation_size: usize, table_layout: TableLayout, @@ -1470,10 +1494,10 @@ fn maximum_buckets_in( // Then the alignment can be ignored if we add the constraint: // x * y >= table_layout.ctrl_align // This is taken care of by `capacity_to_buckets`. - let numerator = allocation_size - group_width; - let denominator = table_layout.size + 1; // todo: ZSTs? - let quotient = numerator / denominator; - prev_pow2(quotient) + // It may be helpful to understand this if you remember that: + // ctrl_offset = align(x * y, ctrl_align) + let x = (allocation_size - group_width) / (table_layout.size + 1); + prev_pow2(x) } impl RawTableInner { @@ -1509,17 +1533,19 @@ impl RawTableInner { let ptr: NonNull = match do_alloc(alloc, layout) { Ok(block) => { - if block.len() > layout.size() { + // The allocator can't return a value smaller than was + // requested, so this can be != instead of >=. + if block.len() != layout.size() { // Utilize over-sized allocations. let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); debug_assert!(x >= buckets); // Calculate the new ctrl_offset. - let (_oversized_layout, oversized_ctrl_offset) = + let (oversized_layout, oversized_ctrl_offset) = match table_layout.calculate_layout_for(x) { Some(lco) => lco, None => unsafe { hint::unreachable_unchecked() }, }; - debug_assert!(_oversized_layout.size() <= block.len()); + debug_assert!(oversized_layout.size() <= block.len()); debug_assert!(oversized_ctrl_offset >= ctrl_offset); ctrl_offset = oversized_ctrl_offset; buckets = x; From 88919fce8eca4b4edd51e1f105ba5121d801014b Mon Sep 17 00:00:00 2001 From: Levi Morrison Date: Mon, 29 Sep 2025 09:05:18 -0600 Subject: [PATCH 5/5] build: fix return --- src/raw/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index dc3b35ce4..a96282b70 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -148,7 +148,7 @@ fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option { 16 }; ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); - Some(buckets) + return Some(buckets); } // Otherwise require 1/8 buckets to be empty (87.5% load)