diff --git a/Cargo.toml b/Cargo.toml index 86ec4afe1..45e433fff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,9 @@ serde_test = "1.0" doc-comment = "0.3.1" bumpalo = { version = "3.13.0", features = ["allocator-api2"] } +[target.'cfg(unix)'.dev-dependencies] +libc = "0.2.155" + [features] default = ["default-hasher", "inline-more", "allocator-api2", "equivalent", "raw-entry"] diff --git a/benches/with_capacity.rs b/benches/with_capacity.rs new file mode 100644 index 000000000..eeb85b59a --- /dev/null +++ b/benches/with_capacity.rs @@ -0,0 +1,38 @@ +#![feature(test)] + +extern crate test; + +use hashbrown::HashMap; +use test::{black_box, Bencher}; + +type Map = HashMap; + +macro_rules! bench_with_capacity { + ($name:ident, $cap:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + b.iter(|| { + // Construct a new empty map with a given capacity and return it to avoid + // being optimized away. Dropping it measures allocation + minimal setup. + let m: Map = Map::with_capacity($cap); + black_box(m) + }); + } + }; +} + +bench_with_capacity!(with_capacity_000000, 0); +bench_with_capacity!(with_capacity_000001, 1); +bench_with_capacity!(with_capacity_000003, 3); +bench_with_capacity!(with_capacity_000007, 7); +bench_with_capacity!(with_capacity_000008, 8); +bench_with_capacity!(with_capacity_000016, 16); +bench_with_capacity!(with_capacity_000032, 32); +bench_with_capacity!(with_capacity_000064, 64); +bench_with_capacity!(with_capacity_000128, 128); +bench_with_capacity!(with_capacity_000256, 256); +bench_with_capacity!(with_capacity_000512, 512); +bench_with_capacity!(with_capacity_001024, 1024); +bench_with_capacity!(with_capacity_004096, 4096); +bench_with_capacity!(with_capacity_016384, 16384); +bench_with_capacity!(with_capacity_065536, 65536); diff --git a/src/map.rs b/src/map.rs index 86f0ce09a..9890dc3d6 100644 --- a/src/map.rs +++ b/src/map.rs @@ -6631,3 +6631,136 @@ mod test_map { ); } } + +#[cfg(all(test, unix, any(feature = "nightly", feature = "allocator-api2")))] +mod test_map_with_mmap_allocations { + use super::HashMap; + use crate::raw::prev_pow2; + use core::alloc::Layout; + use core::ptr::{null_mut, NonNull}; + + #[cfg(feature = "nightly")] + use core::alloc::{AllocError, Allocator}; + + #[cfg(all(feature = "allocator-api2", not(feature = "nightly")))] + use allocator_api2::alloc::{AllocError, Allocator}; + + /// This is not a production quality allocator, just good enough for + /// some basic tests. + #[derive(Clone, Copy, Debug)] + struct MmapAllocator { + /// Guarantee this is a power of 2. + page_size: usize, + } + + impl MmapAllocator { + fn new() -> Result { + let result = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; + if result < 1 { + return Err(AllocError); + } + + let page_size = result as usize; + if !page_size.is_power_of_two() { + Err(AllocError) + } else { + Ok(Self { page_size }) + } + } + + fn fit_to_page_size(&self, n: usize) -> Result { + // If n=0, give a single page (wasteful, I know). + let n = if n == 0 { self.page_size } else { n }; + + match n & (self.page_size - 1) { + 0 => Ok(n), + rem => n.checked_add(self.page_size - rem).ok_or(AllocError), + } + } + } + + unsafe impl Allocator for MmapAllocator { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + if layout.align() > self.page_size { + return Err(AllocError); + } + + let null = null_mut(); + let len = self.fit_to_page_size(layout.size())? as libc::size_t; + let prot = libc::PROT_READ | libc::PROT_WRITE; + let flags = libc::MAP_PRIVATE | libc::MAP_ANON; + let addr = unsafe { libc::mmap(null, len, prot, flags, -1, 0) }; + + // mmap returns MAP_FAILED on failure, not Null. + if addr == libc::MAP_FAILED { + return Err(AllocError); + } + + match NonNull::new(addr.cast()) { + Some(data) => { + // SAFETY: this is NonNull::slice_from_raw_parts. + Ok(unsafe { + NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut( + data.as_ptr(), + len, + )) + }) + } + + // This branch shouldn't be taken in practice, but since we + // cannot return null as a valid pointer in our type system, + // we attempt to handle it. + None => { + _ = unsafe { libc::munmap(addr, len) }; + Err(AllocError) + } + } + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + // If they allocated it with this layout, it must round correctly. + let size = self.fit_to_page_size(layout.size()).unwrap(); + let _result = libc::munmap(ptr.as_ptr().cast(), size); + debug_assert_eq!(0, _result) + } + } + + #[test] + fn test_tiny_allocation_gets_rounded_to_page_size() { + let alloc = MmapAllocator::new().unwrap(); + let mut map: HashMap = HashMap::with_capacity_in(1, alloc); + + // Size of an element plus its control byte. + let rough_bucket_size = core::mem::size_of::<(usize, ())>() + 1; + + // Accounting for some misc. padding that's likely in the allocation + // due to rounding to group width, etc. + let overhead = 3 * core::mem::size_of::(); + let num_buckets = (alloc.page_size - overhead) / rough_bucket_size; + // Buckets are always powers of 2. + let min_elems = prev_pow2(num_buckets); + // Real load-factor is 7/8, but this is a lower estimation, so 1/2. + let min_capacity = min_elems >> 1; + let capacity = map.capacity(); + assert!( + capacity >= min_capacity, + "failed: {capacity} >= {min_capacity}" + ); + + // Fill it up. + for i in 0..capacity { + map.insert(i, ()); + } + // Capacity should not have changed and it should be full. + assert_eq!(capacity, map.len()); + assert_eq!(capacity, map.capacity()); + + // Alright, make it grow. + map.insert(capacity, ()); + assert!( + capacity < map.capacity(), + "failed: {capacity} < {}", + map.capacity() + ); + } +} diff --git a/src/raw/alloc.rs b/src/raw/alloc.rs index c01e2a45c..bacb4a149 100644 --- a/src/raw/alloc.rs +++ b/src/raw/alloc.rs @@ -15,9 +15,9 @@ mod inner { use core::ptr::NonNull; #[allow(clippy::map_err_ignore)] - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { match alloc.allocate(layout) { - Ok(ptr) => Ok(ptr.as_non_null_ptr()), + Ok(ptr) => Ok(ptr), Err(_) => Err(()), } } @@ -38,9 +38,9 @@ mod inner { use core::ptr::NonNull; #[allow(clippy::map_err_ignore)] - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { match alloc.allocate(layout) { - Ok(ptr) => Ok(ptr.cast()), + Ok(ptr) => Ok(ptr), Err(_) => Err(()), } } @@ -61,7 +61,7 @@ mod inner { #[allow(clippy::missing_safety_doc)] // not exposed outside of this crate pub unsafe trait Allocator { - fn allocate(&self, layout: Layout) -> Result, ()>; + fn allocate(&self, layout: Layout) -> Result, ()>; unsafe fn deallocate(&self, ptr: NonNull, layout: Layout); } @@ -70,8 +70,19 @@ mod inner { unsafe impl Allocator for Global { #[inline] - fn allocate(&self, layout: Layout) -> Result, ()> { - unsafe { NonNull::new(alloc(layout)).ok_or(()) } + fn allocate(&self, layout: Layout) -> Result, ()> { + match unsafe { NonNull::new(alloc(layout)) } { + Some(data) => { + // SAFETY: this is NonNull::slice_from_raw_parts. + Ok(unsafe { + NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut( + data.as_ptr(), + layout.size(), + )) + }) + } + None => Err(()), + } } #[inline] unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { @@ -86,7 +97,7 @@ mod inner { } } - pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { + pub(crate) fn do_alloc(alloc: &A, layout: Layout) -> Result, ()> { alloc.allocate(layout) } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 6a8d37d82..a96282b70 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -97,6 +97,8 @@ impl ProbeSeq { /// taking the maximum load factor into account. /// /// Returns `None` if an overflow occurs. +/// +/// This ensures that `buckets * table_layout.size >= table_layout.ctrl_align`. // Workaround for emscripten bug emscripten-core/emscripten-fastcomp#258 #[cfg_attr(target_os = "emscripten", inline(never))] #[cfg_attr(not(target_os = "emscripten"), inline)] @@ -138,13 +140,15 @@ fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option { // We don't bother with a table size of 2 buckets since that can only // hold a single element. Instead, we skip directly to a 4 bucket table // which can hold 3 elements. - return Some(if cap < 4 { + let buckets = if cap < 4 { 4 } else if cap < 8 { 8 } else { 16 - }); + }; + ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); + return Some(buckets); } // Otherwise require 1/8 buckets to be empty (87.5% load) @@ -156,7 +160,22 @@ fn capacity_to_buckets(cap: usize, table_layout: TableLayout) -> Option { // Any overflows will have been caught by the checked_mul. Also, any // rounding errors from the division above will be cleaned up by // next_power_of_two (which can't overflow because of the previous division). - Some(adjusted_cap.next_power_of_two()) + let buckets = adjusted_cap.next_power_of_two(); + ensure_bucket_bytes_at_least_ctrl_align(table_layout, buckets); + Some(buckets) +} + +// `maximum_buckets_in` relies on the property that for non-ZST `T`, any +// chosen `buckets` will satisfy `buckets * table_layout.size >= +// table_layout.ctrl_align`, so `calculate_layout_for` does not need to add +// extra padding beyond `table_layout.size * buckets`. If small-table bucket +// selection or growth policy changes, revisit `maximum_buckets_in`. +#[inline] +fn ensure_bucket_bytes_at_least_ctrl_align(table_layout: TableLayout, buckets: usize) { + if table_layout.size != 0 { + let prod = table_layout.size.saturating_mul(buckets); + debug_assert!(prod >= table_layout.ctrl_align); + } } /// Returns the maximum effective capacity for the given bucket mask, taking @@ -1442,6 +1461,45 @@ impl RawTableInner { } } +/// Find the previous power of 2. If it's already a power of 2, it's unchanged. +/// Passing zero is undefined behavior. +pub(crate) fn prev_pow2(z: usize) -> usize { + let shift = mem::size_of::() * 8 - 1; + 1 << (shift - (z.leading_zeros() as usize)) +} + +/// Finds the largest number of buckets that can fit in `allocation_size` +/// provided the given TableLayout. +/// +/// This relies on some invariants of `capacity_to_buckets`, so only feed in +/// an `allocation_size` calculated from `capacity_to_buckets`. +fn maximum_buckets_in( + allocation_size: usize, + table_layout: TableLayout, + group_width: usize, +) -> usize { + // Given an equation like: + // z >= x * y + x + g + // x can be maximized by doing: + // x = (z - g) / (y + 1) + // If you squint: + // x is the number of buckets + // y is the table_layout.size + // z is the size of the allocation + // g is the group width + // But this is ignoring the padding needed for ctrl_align. + // If we remember these restrictions: + // x is always a power of 2 + // Layout size for T must always be a multiple of T + // Then the alignment can be ignored if we add the constraint: + // x * y >= table_layout.ctrl_align + // This is taken care of by `capacity_to_buckets`. + // It may be helpful to understand this if you remember that: + // ctrl_offset = align(x * y, ctrl_align) + let x = (allocation_size - group_width) / (table_layout.size + 1); + prev_pow2(x) +} + impl RawTableInner { /// Allocates a new [`RawTableInner`] with the given number of buckets. /// The control bytes and buckets are left uninitialized. @@ -1459,7 +1517,7 @@ impl RawTableInner { unsafe fn new_uninitialized( alloc: &A, table_layout: TableLayout, - buckets: usize, + mut buckets: usize, fallibility: Fallibility, ) -> Result where @@ -1468,13 +1526,33 @@ impl RawTableInner { debug_assert!(buckets.is_power_of_two()); // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) { + let (layout, mut ctrl_offset) = match table_layout.calculate_layout_for(buckets) { Some(lco) => lco, None => return Err(fallibility.capacity_overflow()), }; let ptr: NonNull = match do_alloc(alloc, layout) { - Ok(block) => block.cast(), + Ok(block) => { + // The allocator can't return a value smaller than was + // requested, so this can be != instead of >=. + if block.len() != layout.size() { + // Utilize over-sized allocations. + let x = maximum_buckets_in(block.len(), table_layout, Group::WIDTH); + debug_assert!(x >= buckets); + // Calculate the new ctrl_offset. + let (oversized_layout, oversized_ctrl_offset) = + match table_layout.calculate_layout_for(x) { + Some(lco) => lco, + None => unsafe { hint::unreachable_unchecked() }, + }; + debug_assert!(oversized_layout.size() <= block.len()); + debug_assert!(oversized_ctrl_offset >= ctrl_offset); + ctrl_offset = oversized_ctrl_offset; + buckets = x; + } + + block.cast() + } Err(_) => return Err(fallibility.alloc_err(layout)), }; @@ -4168,6 +4246,23 @@ impl RawExtractIf<'_, T, A> { mod test_map { use super::*; + #[test] + fn test_prev_pow2() { + // Skip 0, not defined for that input. + let mut pow2: usize = 1; + while (pow2 << 1) > 0 { + let next_pow2 = pow2 << 1; + assert_eq!(pow2, prev_pow2(pow2)); + // Need to skip 2, because it's also a power of 2, so it doesn't + // return the previous power of 2. + if next_pow2 > 2 { + assert_eq!(pow2, prev_pow2(pow2 + 1)); + assert_eq!(pow2, prev_pow2(next_pow2 - 1)); + } + pow2 = next_pow2; + } + } + #[test] fn test_minimum_capacity_for_small_types() { #[track_caller]