diff --git a/Cargo.lock b/Cargo.lock index 8f9b19e5a4587..65872663e73a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5107,9 +5107,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.26" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "plotters" @@ -11732,9 +11732,9 @@ dependencies = [ [[package]] name = "zstd" -version = "0.13.2" +version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" +checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" dependencies = [ "zstd-safe", ] @@ -11750,9 +11750,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" +version = "2.0.15+zstd.1.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" +checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" dependencies = [ "cc", "pkg-config", diff --git a/turbopack/crates/turbo-persistence/Cargo.toml b/turbopack/crates/turbo-persistence/Cargo.toml index ac4e7e5cc45f3..8079ce13ed9cf 100644 --- a/turbopack/crates/turbo-persistence/Cargo.toml +++ b/turbopack/crates/turbo-persistence/Cargo.toml @@ -27,7 +27,7 @@ smallvec = { workspace = true } thread_local = { workspace = true } tracing = { workspace = true } twox-hash = { workspace = true } -zstd = { version = "0.13.2", features = ["zdict_builder"] } +zstd = { version = "0.13.3", features = ["zdict_builder"] } [dev-dependencies] rand = { workspace = true, features = ["small_rng"] } diff --git a/turbopack/crates/turbo-persistence/src/static_sorted_file_builder.rs b/turbopack/crates/turbo-persistence/src/static_sorted_file_builder.rs index 66d0d043ce25f..4c94bc10eb338 100644 --- a/turbopack/crates/turbo-persistence/src/static_sorted_file_builder.rs +++ b/turbopack/crates/turbo-persistence/src/static_sorted_file_builder.rs @@ -1,6 +1,6 @@ use std::{ borrow::Cow, - cmp::min, + cmp::{max, min}, fs::File, io::{BufWriter, Seek, Write}, path::Path, @@ -35,11 +35,9 @@ const AMQF_FALSE_POSITIVE_RATE: f64 = 0.01; const KEY_COMPRESSION_DICTIONARY_SIZE: usize = 64 * 1024 - 1; /// The maximum bytes that should be selected as key samples to create a compression dictionary const KEY_COMPRESSION_SAMPLES_SIZE: usize = 256 * 1024; -/// The minimum bytes that should be selected as keys samples. Below that no compression dictionary +/// The minimum bytes that should be selected as key samples. Below that no compression dictionary /// is used. const MIN_KEY_COMPRESSION_SAMPLES_SIZE: usize = 1024; -/// The bytes that are used per key entry for a sample. -const COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY: usize = 100; /// The minimum bytes that are used per key entry for a sample. const MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY: usize = 16; @@ -147,12 +145,10 @@ pub fn write_static_stored_file( } fn get_compression_buffer_capacity(total_key_size: usize) -> usize { - let mut size = 0; - if total_key_size >= MIN_KEY_COMPRESSION_SAMPLES_SIZE { - let key_compression_samples_size = min(KEY_COMPRESSION_SAMPLES_SIZE, total_key_size / 16); - size = key_compression_samples_size; + if total_key_size < MIN_KEY_COMPRESSION_SAMPLES_SIZE { + return 0; } - size + min(KEY_COMPRESSION_SAMPLES_SIZE, total_key_size / 16) } /// Computes compression dictionaries from keys of all entries @@ -162,23 +158,28 @@ fn compute_key_compression_dictionary( total_key_size: usize, buffer: &mut Vec, ) -> Result> { - if total_key_size < MIN_KEY_COMPRESSION_SAMPLES_SIZE { + let key_compression_samples_size = get_compression_buffer_capacity(total_key_size); + if key_compression_samples_size == 0 { return Ok(Vec::new()); } - let key_compression_samples_size = min(KEY_COMPRESSION_SAMPLES_SIZE, total_key_size / 16); + + let max_sample_size = max( + MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY, + key_compression_samples_size / 1024, + ); + let mut sample_sizes = Vec::new(); - // Limit the number of iterations to avoid infinite loops - let max_iterations = total_key_size / COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY * 2; - for i in 0..max_iterations { - let entry = &entries[i % entries.len()]; + for entry in entries { let key_remaining = key_compression_samples_size - buffer.len(); if key_remaining < MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY { break; } let len = entry.key_len(); if len >= MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY { - let used_len = min(key_remaining, COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY); + let optimal_len = + (len / 8).clamp(MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY, max_sample_size); + let used_len = min(key_remaining, optimal_len); if len <= used_len { sample_sizes.push(len); entry.write_key_to(buffer); @@ -193,10 +194,12 @@ fn compute_key_compression_dictionary( } } } - debug_assert!(buffer.len() == sample_sizes.iter().sum::()); - let result = if buffer.len() > MIN_KEY_COMPRESSION_SAMPLES_SIZE && sample_sizes.len() > 5 { - zstd::dict::from_continuous(buffer, &sample_sizes, KEY_COMPRESSION_DICTIONARY_SIZE) - .context("Key dictionary creation failed")? + /// The zlib dict builder requires at least 7 samples + const MIN_SAMPLE_SIZE: usize = 7; + let result = if buffer.len() > MIN_KEY_COMPRESSION_SAMPLES_SIZE + && sample_sizes.len() > MIN_SAMPLE_SIZE + { + zstd::dict::from_continuous(buffer, &sample_sizes, KEY_COMPRESSION_DICTIONARY_SIZE)? } else { Vec::new() };