From b6130d9f0058d12c12aaf25cf87c3c33a5d7666f Mon Sep 17 00:00:00 2001 From: Bastien Faivre Date: Tue, 23 Dec 2025 12:21:14 +0100 Subject: [PATCH 1/3] feat: replay height in reth when lagging behind --- CHANGELOG.md | 1 + app/src/app.rs | 133 +++++++++++++++++++++++++++++++++++++++++++ compose.yaml | 7 --- engine/src/engine.rs | 9 +++ 4 files changed, 143 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 554fd99..915fe54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ### FEATURES - Scripts can now generate setup for more than 4 nodes ([#136](https://github.com/informalsystems/emerald/pull/136)) +- Height replay mechanism automatically recovers when Reth is behind Emerald's stored height after a crash, eliminating the need for `--engine.persistence-threshold=0` ### FIXES diff --git a/app/src/app.rs b/app/src/app.rs index e56d076..2f97376 100644 --- a/app/src/app.rs +++ b/app/src/app.rs @@ -47,6 +47,106 @@ pub async fn initialize_state_from_genesis(state: &mut State, engine: &Engine) - Ok(()) } +/// Replay blocks from Emerald's store to the execution client (Reth). +/// This is needed when Reth is behind Emerald's stored height after a crash. +async fn replay_heights_to_engine( + state: &State, + engine: &Engine, + start_height: Height, + end_height: Height, + emerald_config: &EmeraldConfig, +) -> eyre::Result<()> { + info!( + "🔄 Replaying heights {} to {} to execution client", + start_height, end_height + ); + + for height in start_height.as_u64()..=end_height.as_u64() { + let height = Height::new(height); + + // Get the certificate and header from store + let (_certificate, header_bytes) = state + .store + .get_certificate_and_header(height) + .await? + .ok_or_eyre(format!( + "Missing certificate or header for height {}", + height + ))?; + + // Deserialize the execution payload + let execution_payload = ExecutionPayloadV3::from_ssz_bytes(&header_bytes).map_err(|e| { + eyre!( + "Failed to deserialize execution payload at height {}: {:?}", + height, + e + ) + })?; + + debug!( + "🔄 Replaying block at height {} with hash {:?}", + height, execution_payload.payload_inner.payload_inner.block_hash + ); + + // Extract versioned hashes from blob transactions + let block: Block = execution_payload.clone().try_into_block().map_err(|e| { + eyre!( + "Failed to convert execution payload to block at height {}: {}", + height, + e + ) + })?; + let versioned_hashes: Vec = + block.body.blob_versioned_hashes_iter().copied().collect(); + + // Submit the block to Reth + let payload_status = engine + .notify_new_block_with_retry( + execution_payload.clone(), + versioned_hashes, + &emerald_config.retry_config, + ) + .await?; + + // Verify the block was accepted + match payload_status.status { + PayloadStatusEnum::Valid => { + debug!("✅ Block at height {} replayed successfully", height); + } + PayloadStatusEnum::Invalid { validation_error } => { + return Err(eyre::eyre!( + "Block replay failed at height {}: {}", + height, + validation_error + )); + } + PayloadStatusEnum::Accepted => { + // ACCEPTED is valid for new_payload - it means the block was buffered + debug!("📥 Block at height {} accepted (buffered)", height); + } + PayloadStatusEnum::Syncing => { + return Err(eyre::eyre!( + "Block replay failed at height {}: execution client still syncing", + height + )); + } + } + + // Update forkchoice to this block + engine + .set_latest_forkchoice_state( + execution_payload.payload_inner.payload_inner.block_hash, + &emerald_config.retry_config, + ) + .await?; + + debug!("🎯 Forkchoice updated to height {}", height); + } + + info!("✅ Successfully replayed all heights to execution client"); + Ok(()) +} + pub async fn initialize_state_from_existing_block( state: &mut State, engine: &Engine, @@ -61,6 +161,39 @@ pub async fn initialize_state_from_existing_block( .await .ok_or_eyre("we have not atomically stored the last block, database corrupted")?; + // Check if Reth is behind Emerald's stored height + let reth_latest_height = engine.get_latest_block_number().await?; + + match reth_latest_height { + Some(reth_height) if reth_height < start_height.as_u64() => { + // Reth is behind - we need to replay blocks + warn!( + "⚠️ Execution client is at height {} but Emerald has blocks up to height {}. Starting height replay.", + reth_height, start_height + ); + + // Replay from Reth's next height to Emerald's stored height + let replay_start = Height::new(reth_height + 1); + replay_heights_to_engine(state, engine, replay_start, start_height, emerald_config) + .await?; + + info!("✅ Height replay completed successfully"); + } + Some(reth_height) => { + debug!( + "Execution client at height {} is aligned with or ahead of Emerald's stored height {}", + reth_height, start_height + ); + } + None => { + // No blocks in Reth yet (genesis case) - this shouldn't happen here + // but handle it gracefully + warn!("⚠️ Execution client has no blocks, replaying from genesis"); + replay_heights_to_engine(state, engine, Height::new(1), start_height, emerald_config) + .await?; + } + } + let payload_status = engine .send_forkchoice_updated( latest_block_candidate_from_store.block_hash, diff --git a/compose.yaml b/compose.yaml index 6643621..b49f067 100644 --- a/compose.yaml +++ b/compose.yaml @@ -39,7 +39,6 @@ services: - "--discovery.port=31303" - "--port=31303" - "--nat=extip:127.0.0.1" - - "--engine.persistence-threshold=0" # - "--builder.gaslimit=3600000000" # default * 100 # - "--builder.interval=10ms" # - "--builder.deadline=1" # The deadline in seconds for when the payload builder job should resolve @@ -87,7 +86,6 @@ services: - "--discovery.port=32303" - "--port=32303" - "--nat=extip:127.0.0.1" - - "--engine.persistence-threshold=0" # - "--builder.gaslimit=3600000000" # default * 100 # - "--builder.interval=10ms" # - "--builder.deadline=1" # The deadline in seconds for when the payload builder job should resolve @@ -127,7 +125,6 @@ services: - "--discovery.port=33303" - "--port=33303" - "--nat=extip:127.0.0.1" - - "--engine.persistence-threshold=0" # - "--builder.gaslimit=3600000000" # default * 100 # - "--builder.interval=10ms" # - "--builder.deadline=1" # The deadline in seconds for when the payload builder job should resolve @@ -167,7 +164,6 @@ services: - "--discovery.port=34303" - "--port=34303" - "--nat=extip:127.0.0.1" - - "--engine.persistence-threshold=0" # - "--builder.gaslimit=3600000000" # default * 100 # - "--builder.interval=10ms" # - "--builder.deadline=1" # The deadline in seconds for when the payload builder job should resolve @@ -207,7 +203,6 @@ services: - "--discovery.port=35303" - "--port=35303" - "--nat=extip:127.0.0.1" - - "--engine.persistence-threshold=0" # - "--builder.gaslimit=3600000000" # default * 100 # - "--builder.interval=10ms" # - "--builder.deadline=1" # The deadline in seconds for when the payload builder job should resolve @@ -247,7 +242,6 @@ services: - "--discovery.port=36303" - "--port=36303" - "--nat=extip:127.0.0.1" - - "--engine.persistence-threshold=0" # - "--builder.gaslimit=3600000000" # default * 100 # - "--builder.interval=10ms" # - "--builder.deadline=1" # The deadline in seconds for when the payload builder job should resolve @@ -287,7 +281,6 @@ services: - "--discovery.port=37303" - "--port=37303" - "--nat=extip:127.0.0.1" - - "--engine.persistence-threshold=0" # - "--builder.gaslimit=3600000000" # default * 100 # - "--builder.interval=10ms" # - "--builder.deadline=1" # The deadline in seconds for when the payload builder job should resolve diff --git a/engine/src/engine.rs b/engine/src/engine.rs index 76c31df..bef1ab6 100644 --- a/engine/src/engine.rs +++ b/engine/src/engine.rs @@ -291,6 +291,15 @@ impl Engine { } } + /// Get the latest block number from the execution client. + /// Returns None if the client has no blocks (genesis case). + pub async fn get_latest_block_number(&self) -> eyre::Result> { + debug!("🟠 get_latest_block_number"); + + let block = self.eth.get_block_by_number("latest").await?; + Ok(block.map(|b| b.block_number)) + } + /// Returns the duration since the unix epoch. fn _timestamp_now(&self) -> u64 { SystemTime::now() From 4f8b4df653454e04c6d57504c5be1979cde69afa Mon Sep 17 00:00:00 2001 From: Bastien Faivre Date: Tue, 23 Dec 2025 12:36:51 +0100 Subject: [PATCH 2/3] fix: clippy issue --- app/src/app.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/app/src/app.rs b/app/src/app.rs index 54a01c2..1235d65 100644 --- a/app/src/app.rs +++ b/app/src/app.rs @@ -69,10 +69,7 @@ async fn replay_heights_to_engine( .store .get_certificate_and_header(height) .await? - .ok_or_eyre(format!( - "Missing certificate or header for height {}", - height - ))?; + .ok_or_eyre(format!("Missing certificate or header for height {height}"))?; // Deserialize the execution payload let execution_payload = ExecutionPayloadV3::from_ssz_bytes(&header_bytes).map_err(|e| { From d0d8ffcccd8b75932fd490f0bd9d559c17d0c889 Mon Sep 17 00:00:00 2001 From: Bastien Faivre Date: Tue, 23 Dec 2025 16:42:29 +0100 Subject: [PATCH 3/3] fix: ACCEPTED state is invalid, validator set from existing block should be stored for next height --- app/src/app.rs | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/app/src/app.rs b/app/src/app.rs index 1235d65..ccf5de0 100644 --- a/app/src/app.rs +++ b/app/src/app.rs @@ -118,8 +118,11 @@ async fn replay_heights_to_engine( )); } PayloadStatusEnum::Accepted => { - // ACCEPTED is valid for new_payload - it means the block was buffered - debug!("📥 Block at height {} accepted (buffered)", height); + // ACCEPTED is no instant finality and there is a possibility of a fork. + return Err(eyre::eyre!( + "Block replay failed at height {}: execution client returned ACCEPTED status, which is not supported during replay", + height + )); } PayloadStatusEnum::Syncing => { return Err(eyre::eyre!( @@ -211,13 +214,20 @@ pub async fn initialize_state_from_existing_block( // requisite data for the validation is missing debug!("Payload is valid"); debug!("latest block {:?}", state.latest_block); + + // Read the validator set at the stored block - this is the validator set + // that will be active for the NEXT height (where consensus will start) let block_validator_set = read_validators_from_contract( engine.eth.url().as_ref(), &latest_block_candidate_from_store.block_hash, ) .await?; - debug!("🌈 Got block validator set: {:?}", block_validator_set); - state.set_validator_set(start_height, block_validator_set); + + // Consensus will start at the next height, so we set the validator set for that height + let next_height = start_height.increment(); + debug!("🌈 Got validator set: {:?} for height {}", block_validator_set, next_height); + state.set_validator_set(next_height, block_validator_set); + Ok(()) } PayloadStatusEnum::Invalid { validation_error } => Err(eyre::eyre!(validation_error)), @@ -312,7 +322,9 @@ pub async fn run( start_height, state .get_validator_set(start_height) - .ok_or_eyre("Validator set not found for start height {start_height}")? + .ok_or_eyre(format!( + "Validator set not found for start height {start_height}" + ))? .clone(), )) .is_err()