diff --git a/CHANGELOG.md b/CHANGELOG.md index c2a7e9c4788..6f0eea55360 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Perf +### 2025-10-30 + += Add bloom filters to snapshot layers [#5112](https://github.com/lambdaclass/ethrex/pull/5112) + ### 2025-10-28 - Batch BlobsBundle::validate [#4993](https://github.com/lambdaclass/ethrex/pull/4993) diff --git a/Cargo.lock b/Cargo.lock index 1ff1bf919fe..5dee9759700 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3761,6 +3761,7 @@ dependencies = [ "ethrex-common", "ethrex-rlp", "ethrex-trie", + "fastbloom", "hex", "hex-literal", "rocksdb", @@ -3903,6 +3904,18 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "fastbloom" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18c1ddb9231d8554c2d6bdf4cfaabf0c59251658c68b6c95cd52dd0c513a912a" +dependencies = [ + "getrandom 0.3.4", + "libm", + "rand 0.9.2", + "siphasher", +] + [[package]] name = "fastrand" version = "2.3.0" diff --git a/crates/l2/tee/quote-gen/Cargo.lock b/crates/l2/tee/quote-gen/Cargo.lock index 359b59cac95..cd49b893d67 100644 --- a/crates/l2/tee/quote-gen/Cargo.lock +++ b/crates/l2/tee/quote-gen/Cargo.lock @@ -2437,6 +2437,7 @@ dependencies = [ "ethrex-common", "ethrex-rlp", "ethrex-trie", + "fastbloom", "hex", "rustc-hash", "serde 1.0.228", @@ -2536,6 +2537,18 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "fastbloom" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18c1ddb9231d8554c2d6bdf4cfaabf0c59251658c68b6c95cd52dd0c513a912a" +dependencies = [ + "getrandom 0.3.3", + "libm", + "rand 0.9.2", + "siphasher", +] + [[package]] name = "fastrand" version = "2.3.0" diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml index 57d0249b3e0..357011df5fa 100644 --- a/crates/storage/Cargo.toml +++ b/crates/storage/Cargo.toml @@ -26,6 +26,7 @@ rocksdb = { workspace = true, optional = true } rustc-hash.workspace = true tokio = { workspace = true, optional = true, features = ["rt"] } bincode = "1.3.3" +fastbloom = "0.14.0" [features] default = [] diff --git a/crates/storage/trie_db/layering.rs b/crates/storage/trie_db/layering.rs index b703fcb6a39..71a2739fecc 100644 --- a/crates/storage/trie_db/layering.rs +++ b/crates/storage/trie_db/layering.rs @@ -11,19 +11,45 @@ struct TrieLayer { id: usize, } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub struct TrieLayerCache { /// Monotonically increasing ID for layers, starting at 1. /// TODO: this implementation panics on overflow last_id: usize, layers: FxHashMap>, + /// Global bloom that accrues all layer blooms. + /// + /// The bloom filter is used to avoid looking up all layers when the given path doesn't exist in any + /// layer, thus going directly to the database. + bloom: fastbloom::BloomFilter, +} + +impl Default for TrieLayerCache { + fn default() -> Self { + Self { + // todo: tune this + bloom: fastbloom::BloomFilter::with_num_bits(8192).expected_items(128 * 512), + last_id: 0, + layers: Default::default(), + } + } } impl TrieLayerCache { pub fn get(&self, state_root: H256, key: Nibbles) -> Option> { + let key = key.as_ref(); + + // Fast check to know if any layer may contains the given key. + // We can only be certain it doesn't exist, but if it returns true it may or not exist (false positive). + if !self.bloom.contains(key) { + // TrieWrapper goes to db when returning None. + return None; + } + let mut current_state_root = state_root; + while let Some(layer) = self.layers.get(¤t_state_root) { - if let Some(value) = layer.nodes.get(key.as_ref()) { + if let Some(value) = layer.nodes.get(key) { return Some(value.clone()); } current_state_root = layer.parent; @@ -70,9 +96,13 @@ impl TrieLayerCache { return; } + // add this new bloom to the global one. + self.bloom + .insert_all(key_values.iter().map(|x| x.0.as_ref())); + let nodes: FxHashMap, Vec> = key_values .into_iter() - .map(|(path, node)| (path.into_vec(), node)) + .map(|(path, value)| (path.into_vec(), value)) .collect(); self.last_id += 1; @@ -84,6 +114,15 @@ impl TrieLayerCache { self.layers.insert(state_root, Arc::new(entry)); } + /// Rebuilds the global bloom filter accruing all current existing layers. + pub fn rebuild_bloom(&mut self) { + self.bloom.clear(); + + for entry in self.layers.values() { + self.bloom.insert_all(entry.nodes.iter().map(|x| x.0)); + } + } + pub fn commit(&mut self, state_root: H256) -> Option, Vec)>> { let layer = match Arc::try_unwrap(self.layers.remove(&state_root)?) { Ok(layer) => layer, @@ -93,6 +132,7 @@ impl TrieLayerCache { let parent_nodes = self.commit(layer.parent); // older layers are useless self.layers.retain(|_, item| item.id > layer.id); + self.rebuild_bloom(); // layers removed, rebuild global bloom filter. Some( parent_nodes .unwrap_or_default() diff --git a/tooling/Cargo.lock b/tooling/Cargo.lock index 68bfbcadbe3..79dcf8b3635 100644 --- a/tooling/Cargo.lock +++ b/tooling/Cargo.lock @@ -3641,6 +3641,7 @@ dependencies = [ "ethrex-common 5.0.0", "ethrex-rlp 5.0.0", "ethrex-trie 5.0.0", + "fastbloom", "hex", "rocksdb", "rustc-hash 2.1.1", @@ -3775,6 +3776,18 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "fastbloom" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18c1ddb9231d8554c2d6bdf4cfaabf0c59251658c68b6c95cd52dd0c513a912a" +dependencies = [ + "getrandom 0.3.4", + "libm", + "rand 0.9.2", + "siphasher", +] + [[package]] name = "fastrand" version = "2.3.0"