|
1 | 1 | use std::{ |
2 | 2 | borrow::Cow, |
3 | | - cmp::min, |
| 3 | + cmp::{max, min}, |
4 | 4 | fs::File, |
5 | 5 | io::{BufWriter, Seek, Write}, |
6 | 6 | path::Path, |
@@ -38,8 +38,6 @@ const KEY_COMPRESSION_SAMPLES_SIZE: usize = 256 * 1024; |
38 | 38 | /// The minimum bytes that should be selected as key samples. Below that no compression dictionary |
39 | 39 | /// is used. |
40 | 40 | const MIN_KEY_COMPRESSION_SAMPLES_SIZE: usize = 1024; |
41 | | -/// The bytes that are used per key entry for a sample. |
42 | | -const COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY: usize = 100; |
43 | 41 | /// The minimum bytes that are used per key entry for a sample. |
44 | 42 | const MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY: usize = 16; |
45 | 43 |
|
@@ -167,17 +165,15 @@ fn compute_key_compression_dictionary<E: Entry>( |
167 | 165 |
|
168 | 166 | let mut sample_sizes = Vec::new(); |
169 | 167 |
|
170 | | - // Limit the number of iterations to avoid infinite loops |
171 | | - let max_iterations = total_key_size / COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY * 2; |
172 | | - for i in 0..max_iterations { |
173 | | - let entry = &entries[i % entries.len()]; |
| 168 | + for entry in entries { |
174 | 169 | let key_remaining = key_compression_samples_size - buffer.len(); |
175 | 170 | if key_remaining < MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY { |
176 | 171 | break; |
177 | 172 | } |
178 | 173 | let len = entry.key_len(); |
179 | 174 | if len >= MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY { |
180 | | - let used_len = min(key_remaining, COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY); |
| 175 | + let optimal_len = max(MIN_COMPRESSION_DICTIONARY_SAMPLE_PER_ENTRY, len / 8); |
| 176 | + let used_len = min(key_remaining, optimal_len); |
181 | 177 | if len <= used_len { |
182 | 178 | sample_sizes.push(len); |
183 | 179 | entry.write_key_to(buffer); |
|
0 commit comments