Skip to content

Commit 2bdbced

Browse files
ferranbtcanercidam
andauthored
Custom health monitor for execution and beacon (#283)
This PR creates a new internal service `healthmon` (shipped on the `playground-utils` docker image) intended to be a replacement of the current https://github.com/flashbots/ethereum-healthmon sidecar. Since this new bespoke version of the health monitor is intended to be used only on the playground, we have made certain design decisions to improve the UX. For example, the chains are considered ready at block 0 right away after the RPC endpoints are available. Closes #282 --------- Co-authored-by: Caner Çıdam <[email protected]>
1 parent 620b143 commit 2bdbced

File tree

11 files changed

+384
-24
lines changed

11 files changed

+384
-24
lines changed

.github/workflows/checks.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ jobs:
8383
- name: Install docker compose
8484
run: ./scripts/ci-setup-docker-compose.sh
8585

86+
- name: Build playground utils
87+
run: ./scripts/ci-build-playground-utils.sh
88+
8689
- name: Run unit tests
8790
run: make integration-test
8891

.github/workflows/docker-utils-release.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ on:
1010
paths:
1111
- 'cl-proxy/**'
1212
- 'mev-boost-relay/**'
13+
- 'healthmon/**'
1314
- 'go.mod'
1415
- 'go.sum'
1516
- 'Dockerfile'

Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ COPY . .
1313

1414
# Build all applications with CGo enabled
1515
RUN go build -o /usr/local/bin/cl-proxy ./cl-proxy/cmd/main.go && \
16-
go build -o /usr/local/bin/mev-boost-relay ./mev-boost-relay/cmd/main.go
16+
go build -o /usr/local/bin/mev-boost-relay ./mev-boost-relay/cmd/main.go && \
17+
go build -o /usr/local/bin/healthmon ./healthmon/cmd/main.go

go.mod

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ require (
1111
github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0
1212
github.com/ethereum/go-ethereum v1.15.10
1313
github.com/flashbots/go-boost-utils v1.9.1-0.20250819134059-e5294cb450c9
14+
github.com/flashbots/go-template v1.0.0
1415
github.com/flashbots/mev-boost-relay v0.32.0-rc2
16+
github.com/go-chi/httplog/v2 v2.1.1
1517
github.com/hashicorp/go-uuid v1.0.3
1618
github.com/holiman/uint256 v1.3.2
1719
github.com/otiai10/copy v1.14.1
@@ -65,6 +67,7 @@ require (
6567
github.com/ferranbt/fastssz v0.1.4 // indirect
6668
github.com/flashbots/go-utils v0.11.0 // indirect
6769
github.com/fsnotify/fsnotify v1.6.0 // indirect
70+
github.com/go-chi/chi/v5 v5.2.1 // indirect
6871
github.com/go-gorp/gorp/v3 v3.1.0 // indirect
6972
github.com/go-logr/logr v1.4.2 // indirect
7073
github.com/go-logr/stdr v1.2.2 // indirect
@@ -156,7 +159,7 @@ require (
156159
golang.org/x/crypto v0.37.0 // indirect
157160
golang.org/x/net v0.38.0 // indirect
158161
golang.org/x/oauth2 v0.26.0 // indirect
159-
golang.org/x/sys v0.32.0 // indirect
162+
golang.org/x/sys v0.33.0 // indirect
160163
golang.org/x/term v0.31.0 // indirect
161164
golang.org/x/text v0.24.0 // indirect
162165
golang.org/x/time v0.9.0 // indirect

go.sum

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ github.com/ferranbt/fastssz v0.1.4 h1:OCDB+dYDEQDvAgtAGnTSidK1Pe2tW3nFV40XyMkTeD
133133
github.com/ferranbt/fastssz v0.1.4/go.mod h1:Ea3+oeoRGGLGm5shYAeDgu6PGUlcvQhE2fILyD9+tGg=
134134
github.com/flashbots/go-boost-utils v1.9.1-0.20250819134059-e5294cb450c9 h1:uXedHkX9U9q6tej9VMlNhwGkcGpSK9x+kxBI5xo8Biw=
135135
github.com/flashbots/go-boost-utils v1.9.1-0.20250819134059-e5294cb450c9/go.mod h1:52faeojMg+vjCtjuekWpRzvYU8nlD1ZEPRvRJ2q3bv0=
136+
github.com/flashbots/go-template v1.0.0 h1:ODeaR/kLOQcWaA8tKgTrdZD1BvxXJY53eVLbFfnmxiQ=
137+
github.com/flashbots/go-template v1.0.0/go.mod h1:fBFD7uLaDZ0bEyNKNMx+ByJBg/B2oJKWeU2G7kZtrcY=
136138
github.com/flashbots/go-utils v0.11.0 h1:MuI9OOl40MukSL2ucKKQG1sxxl5Cqjla41TRubGNu0w=
137139
github.com/flashbots/go-utils v0.11.0/go.mod h1:i4xxEB6sHDFfNWEIfh+rP6nx3LxynEn8AOZa05EYgwA=
138140
github.com/flashbots/mev-boost-relay v0.32.0-rc2 h1:0//XfKzBmSnmvXI4iLiTbw0WxElo8Nc6mp7DWPPSsxU=
@@ -146,6 +148,10 @@ github.com/gballet/go-libpcsclite v0.0.0-20191108122812-4678299bea08 h1:f6D9Hr8x
146148
github.com/gballet/go-libpcsclite v0.0.0-20191108122812-4678299bea08/go.mod h1:x7DCsMOv1taUwEWCzT4cmDeAkigA5/QCwUodaVOe8Ww=
147149
github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps=
148150
github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY=
151+
github.com/go-chi/chi/v5 v5.2.1 h1:KOIHODQj58PmL80G2Eak4WdvUzjSJSm0vG72crDCqb8=
152+
github.com/go-chi/chi/v5 v5.2.1/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
153+
github.com/go-chi/httplog/v2 v2.1.1 h1:ojojiu4PIaoeJ/qAO4GWUxJqvYUTobeo7zmuHQJAxRk=
154+
github.com/go-chi/httplog/v2 v2.1.1/go.mod h1:/XXdxicJsp4BA5fapgIC3VuTD+z0Z/VzukoB3VDc1YE=
149155
github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs=
150156
github.com/go-gorp/gorp/v3 v3.1.0/go.mod h1:dLEjIyyRNiXvNZ8PSmzpt1GsWAUK8kjVhEpjH8TixEw=
151157
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -538,8 +544,8 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc
538544
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
539545
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
540546
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
541-
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
542-
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
547+
golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
548+
golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
543549
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
544550
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
545551
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=

healthmon/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
2+
# Healthmon

healthmon/cmd/main.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
package main
2+
3+
import (
4+
"flag"
5+
6+
"github.com/flashbots/builder-playground/healthmon"
7+
)
8+
9+
func main() {
10+
var config healthmon.Config
11+
12+
flag.StringVar(&config.Chain, "chain", "", "Type of ethereum chain to monitor (beacon or execution)")
13+
flag.StringVar(&config.URL, "url", "", "Full node URL (e.g., http://localhost:8545)")
14+
flag.StringVar(&config.Addr, "service.addr", "localhost:21171", "Address for the health check service to listen on (e.g., ':21171')")
15+
flag.IntVar(&config.BlockTimeSeconds, "blocktime", 0, "expected block time in seconds (optional)")
16+
flag.Parse()
17+
18+
healthmon.Start(&config)
19+
}

healthmon/healthmon.go

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
package healthmon
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"io"
7+
"net/http"
8+
"os"
9+
"sync/atomic"
10+
"time"
11+
12+
"github.com/ethereum/go-ethereum"
13+
"github.com/ethereum/go-ethereum/core/types"
14+
"github.com/ethereum/go-ethereum/ethclient"
15+
mevboostrelay "github.com/flashbots/builder-playground/mev-boost-relay"
16+
"github.com/flashbots/go-template/common"
17+
"github.com/flashbots/mev-boost-relay/beaconclient"
18+
mevRCommon "github.com/flashbots/mev-boost-relay/common"
19+
"github.com/go-chi/httplog/v2"
20+
)
21+
22+
var isHealthy atomic.Bool
23+
24+
type Config struct {
25+
Chain string
26+
URL string
27+
Addr string
28+
BlockTimeSeconds int
29+
}
30+
31+
func Start(config *Config) {
32+
log := common.SetupLogger(&common.LoggingOpts{
33+
Version: common.Version,
34+
})
35+
36+
updates := make(chan blockUpdate, 10)
37+
log.Info("Started", "chain", config.Chain, "url", config.URL)
38+
39+
switch config.Chain {
40+
case "beacon":
41+
go monitorBeacon(log, context.Background(), config.URL, updates)
42+
case "execution":
43+
go monitorExecution(log, context.Background(), config.URL, updates)
44+
default:
45+
log.Error("Unknown chain", "chain", config.Chain)
46+
os.Exit(1)
47+
}
48+
49+
go monitor(log, config.BlockTimeSeconds, context.Background(), updates)
50+
51+
log.Info("Starting service server", "addr", config.Addr)
52+
53+
http.HandleFunc("/ready", statusHandler)
54+
http.ListenAndServe(config.Addr, nil)
55+
}
56+
57+
func statusHandler(w http.ResponseWriter, req *http.Request) {
58+
if isHealthy.Load() {
59+
io.WriteString(w, "OK")
60+
} else {
61+
w.WriteHeader(503)
62+
io.WriteString(w, "NOT READY")
63+
}
64+
}
65+
66+
func setHealthy(healthy bool) {
67+
isHealthy.Store(healthy)
68+
}
69+
70+
type monitorState struct {
71+
log *httplog.Logger
72+
firstBlockUpdate *blockUpdate
73+
blockTimeSeconds int
74+
blockTimer *time.Timer
75+
}
76+
77+
func newMonitorState(log *httplog.Logger, blockTimeSeconds int) *monitorState {
78+
// this timer will start after the blocks are received and we can figure out the block time
79+
blockTimer := time.NewTimer(0)
80+
blockTimer.Stop()
81+
82+
return &monitorState{
83+
log: log,
84+
firstBlockUpdate: nil,
85+
blockTimeSeconds: blockTimeSeconds,
86+
blockTimer: blockTimer,
87+
}
88+
}
89+
90+
var wiggleRoomSeconds = 1
91+
92+
func (m *monitorState) handleUpdate(update blockUpdate) {
93+
m.log.Info("Processing block update", "number", update.Number, "timestamp", update.Timestamp)
94+
95+
if m.firstBlockUpdate == nil {
96+
m.firstBlockUpdate = &update
97+
}
98+
99+
if m.blockTimeSeconds == 0 {
100+
// if block time is not known, either:
101+
// - use the block time provided in the update (beacon)
102+
// - use the difference between the first and current block (execution)
103+
if update.BlockTime != 0 {
104+
m.log.Info("Using block time from update", "block time seconds", update.BlockTime)
105+
m.blockTimeSeconds = update.BlockTime
106+
} else if m.firstBlockUpdate != nil && update.Number > m.firstBlockUpdate.Number {
107+
blocktime := update.Timestamp.Sub(m.firstBlockUpdate.Timestamp)
108+
m.log.Info("Calculated block time from timestamps", "block time seconds", blocktime)
109+
m.blockTimeSeconds = int(blocktime.Seconds())
110+
}
111+
}
112+
113+
if m.blockTimeSeconds != 0 {
114+
m.log.Info("Resetting block timer", "blockTimeSeconds", m.blockTimeSeconds)
115+
m.blockTimer.Reset(time.Duration(m.blockTimeSeconds+wiggleRoomSeconds) * time.Second)
116+
}
117+
}
118+
119+
func monitor(log *httplog.Logger, blockTimeSeconds int, ctx context.Context, updates <-chan blockUpdate) {
120+
state := newMonitorState(log, blockTimeSeconds)
121+
122+
for {
123+
select {
124+
case <-ctx.Done():
125+
return
126+
case update := <-updates:
127+
// receiving a block always means healthy since the node is producing blocks
128+
// and the unhealthy state is set during the block timer timeout
129+
setHealthy(true)
130+
131+
state.handleUpdate(update)
132+
133+
case <-state.blockTimer.C:
134+
log.Warn("Block timer expired, setting unhealthy")
135+
setHealthy(false)
136+
}
137+
}
138+
}
139+
140+
type blockUpdate struct {
141+
Number uint64
142+
Timestamp time.Time
143+
BlockTime int
144+
}
145+
146+
func monitorBeacon(log *httplog.Logger, ctx context.Context, url string, updates chan<- blockUpdate) {
147+
bLog := mevRCommon.LogSetup(false, "info")
148+
beaconClient := beaconclient.NewProdBeaconInstance(bLog, url, url)
149+
150+
var lastSlot *uint64
151+
var blockTime int
152+
153+
for {
154+
select {
155+
case <-ctx.Done():
156+
return
157+
case <-time.After(500 * time.Millisecond):
158+
sync, err := beaconClient.SyncStatus()
159+
if err != nil {
160+
log.Error("Failed to get beacon sync status", "err", err)
161+
continue
162+
}
163+
164+
if sync.IsSyncing {
165+
log.Debug("Beacon node is syncing", "headSlot", sync.HeadSlot)
166+
continue
167+
}
168+
169+
if blockTime == 0 {
170+
spec, err := mevboostrelay.GetSpec(url)
171+
if err != nil {
172+
log.Error("Failed to get beacon spec", "err", err)
173+
} else {
174+
blockTime = int(spec.SecondsPerSlot)
175+
log.Info("Fetched beacon spec", "blockTime", blockTime)
176+
}
177+
}
178+
179+
if lastSlot == nil || *lastSlot < sync.HeadSlot {
180+
lastSlot = &sync.HeadSlot
181+
log.Info("New beacon block received", "slot", sync.HeadSlot)
182+
updates <- blockUpdate{Number: sync.HeadSlot, BlockTime: blockTime}
183+
}
184+
}
185+
}
186+
}
187+
188+
func monitorExecution(log *httplog.Logger, ctx context.Context, url string, updates chan<- blockUpdate) {
189+
client, err := ethclient.Dial(url)
190+
if err != nil {
191+
log.Error("Failed to connect to execution client", "err", err)
192+
os.Exit(1)
193+
}
194+
195+
getLatestBlock := func() (*types.Header, error) {
196+
// We use a manual RPC call instead of the Geth SDK's HeaderByNumber because
197+
// we query both OP and normal L1 clients which have different transaction types
198+
// that cannot be decoded with a single Geth SDK. The Geth SDK only returns blocks
199+
// with transactions fully decoded (not just hashes), so we call the RPC directly
200+
// to avoid transaction decoding issues.
201+
var raw json.RawMessage
202+
if err := client.Client().CallContext(ctx, &raw, "eth_getBlockByNumber", "latest", false); err != nil {
203+
return nil, err
204+
}
205+
206+
// Decode header and transactions.
207+
var head *types.Header
208+
if err := json.Unmarshal(raw, &head); err != nil {
209+
return nil, err
210+
}
211+
// When the block is not found, the API returns JSON null.
212+
if head == nil {
213+
return nil, ethereum.NotFound
214+
}
215+
return head, nil
216+
}
217+
218+
var lastBlock *uint64
219+
for {
220+
select {
221+
case <-ctx.Done():
222+
return
223+
case <-time.After(500 * time.Millisecond):
224+
sync, err := client.SyncProgress(ctx)
225+
if err != nil {
226+
log.Error("Failed to get execution sync progress", "err", err)
227+
continue
228+
}
229+
230+
if sync != nil && !sync.Done() {
231+
log.Debug("Execution node is syncing", "currentBlock", sync.CurrentBlock, "highestBlock", sync.HighestBlock)
232+
continue
233+
}
234+
header, err := getLatestBlock()
235+
if err != nil {
236+
log.Error("Failed to get execution block number", "err", err)
237+
continue
238+
}
239+
num := header.Number.Uint64()
240+
if lastBlock == nil || num > *lastBlock {
241+
lastBlock = &num
242+
timestamp := time.Unix(int64(header.Time), 0)
243+
244+
log.Info("New execution block received", "number", num)
245+
updates <- blockUpdate{Number: num, Timestamp: timestamp}
246+
}
247+
}
248+
}
249+
}

0 commit comments

Comments
 (0)