Custom health monitor for execution and beacon (#283)

ferranbt · canercidam · web-flow · commit 2bdbced16a48 · 2026-01-05T16:55:56.000+03:00
This PR creates a new internal service `healthmon` (shipped on the `playground-utils` docker image) intended to be a replacement of the current https://github.com/flashbots/ethereum-healthmon sidecar. Since this new bespoke version of the health monitor is intended to be used only on the playground, we have made certain design decisions to improve the UX. For example, the chains are considered ready at block 0 right away after the RPC endpoints are available. Closes #282 --------- Co-authored-by: Caner Çıdam <canercidam01@gmail.com>
diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml
@@ -83,6 +83,9 @@ jobs:
       - name: Install docker compose
         run: ./scripts/ci-setup-docker-compose.sh
 
+      - name: Build playground utils
+        run: ./scripts/ci-build-playground-utils.sh
+        
       - name: Run unit tests
         run: make integration-test
 
diff --git a/.github/workflows/docker-utils-release.yaml b/.github/workflows/docker-utils-release.yaml
@@ -10,6 +10,7 @@ on:
     paths:
       - 'cl-proxy/**'
       - 'mev-boost-relay/**'
+      - 'healthmon/**'
       - 'go.mod'
       - 'go.sum'
       - 'Dockerfile'
diff --git a/Dockerfile b/Dockerfile
@@ -13,4 +13,5 @@ COPY . .
 
 # Build all applications with CGo enabled
 RUN go build -o /usr/local/bin/cl-proxy ./cl-proxy/cmd/main.go && \
-    go build -o /usr/local/bin/mev-boost-relay ./mev-boost-relay/cmd/main.go
+    go build -o /usr/local/bin/mev-boost-relay ./mev-boost-relay/cmd/main.go && \
+    go build -o /usr/local/bin/healthmon ./healthmon/cmd/main.go
diff --git a/go.mod b/go.mod
@@ -11,7 +11,9 @@ require (
 	github.com/dustinkirkland/golang-petname v0.0.0-20240428194347-eebcea082ee0
 	github.com/ethereum/go-ethereum v1.15.10
 	github.com/flashbots/go-boost-utils v1.9.1-0.20250819134059-e5294cb450c9
+	github.com/flashbots/go-template v1.0.0
 	github.com/flashbots/mev-boost-relay v0.32.0-rc2
+	github.com/go-chi/httplog/v2 v2.1.1
 	github.com/hashicorp/go-uuid v1.0.3
 	github.com/holiman/uint256 v1.3.2
 	github.com/otiai10/copy v1.14.1
@@ -65,6 +67,7 @@ require (
 	github.com/ferranbt/fastssz v0.1.4 // indirect
 	github.com/flashbots/go-utils v0.11.0 // indirect
 	github.com/fsnotify/fsnotify v1.6.0 // indirect
+	github.com/go-chi/chi/v5 v5.2.1 // indirect
 	github.com/go-gorp/gorp/v3 v3.1.0 // indirect
 	github.com/go-logr/logr v1.4.2 // indirect
 	github.com/go-logr/stdr v1.2.2 // indirect
@@ -156,7 +159,7 @@ require (
 	golang.org/x/crypto v0.37.0 // indirect
 	golang.org/x/net v0.38.0 // indirect
 	golang.org/x/oauth2 v0.26.0 // indirect
-	golang.org/x/sys v0.32.0 // indirect
+	golang.org/x/sys v0.33.0 // indirect
 	golang.org/x/term v0.31.0 // indirect
 	golang.org/x/text v0.24.0 // indirect
 	golang.org/x/time v0.9.0 // indirect
diff --git a/go.sum b/go.sum
@@ -133,6 +133,8 @@ github.com/ferranbt/fastssz v0.1.4 h1:OCDB+dYDEQDvAgtAGnTSidK1Pe2tW3nFV40XyMkTeD
 github.com/ferranbt/fastssz v0.1.4/go.mod h1:Ea3+oeoRGGLGm5shYAeDgu6PGUlcvQhE2fILyD9+tGg=
 github.com/flashbots/go-boost-utils v1.9.1-0.20250819134059-e5294cb450c9 h1:uXedHkX9U9q6tej9VMlNhwGkcGpSK9x+kxBI5xo8Biw=
 github.com/flashbots/go-boost-utils v1.9.1-0.20250819134059-e5294cb450c9/go.mod h1:52faeojMg+vjCtjuekWpRzvYU8nlD1ZEPRvRJ2q3bv0=
+github.com/flashbots/go-template v1.0.0 h1:ODeaR/kLOQcWaA8tKgTrdZD1BvxXJY53eVLbFfnmxiQ=
+github.com/flashbots/go-template v1.0.0/go.mod h1:fBFD7uLaDZ0bEyNKNMx+ByJBg/B2oJKWeU2G7kZtrcY=
 github.com/flashbots/go-utils v0.11.0 h1:MuI9OOl40MukSL2ucKKQG1sxxl5Cqjla41TRubGNu0w=
 github.com/flashbots/go-utils v0.11.0/go.mod h1:i4xxEB6sHDFfNWEIfh+rP6nx3LxynEn8AOZa05EYgwA=
 github.com/flashbots/mev-boost-relay v0.32.0-rc2 h1:0//XfKzBmSnmvXI4iLiTbw0WxElo8Nc6mp7DWPPSsxU=
@@ -146,6 +148,10 @@ github.com/gballet/go-libpcsclite v0.0.0-20191108122812-4678299bea08 h1:f6D9Hr8x
 github.com/gballet/go-libpcsclite v0.0.0-20191108122812-4678299bea08/go.mod h1:x7DCsMOv1taUwEWCzT4cmDeAkigA5/QCwUodaVOe8Ww=
 github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps=
 github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY=
+github.com/go-chi/chi/v5 v5.2.1 h1:KOIHODQj58PmL80G2Eak4WdvUzjSJSm0vG72crDCqb8=
+github.com/go-chi/chi/v5 v5.2.1/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops=
+github.com/go-chi/httplog/v2 v2.1.1 h1:ojojiu4PIaoeJ/qAO4GWUxJqvYUTobeo7zmuHQJAxRk=
+github.com/go-chi/httplog/v2 v2.1.1/go.mod h1:/XXdxicJsp4BA5fapgIC3VuTD+z0Z/VzukoB3VDc1YE=
 github.com/go-gorp/gorp/v3 v3.1.0 h1:ItKF/Vbuj31dmV4jxA1qblpSwkl9g1typ24xoe70IGs=
 github.com/go-gorp/gorp/v3 v3.1.0/go.mod h1:dLEjIyyRNiXvNZ8PSmzpt1GsWAUK8kjVhEpjH8TixEw=
 github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -538,8 +544,8 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
-golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
+golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
 golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
diff --git a/healthmon/README.md b/healthmon/README.md
@@ -0,0 +1,2 @@
+
+# Healthmon
diff --git a/healthmon/cmd/main.go b/healthmon/cmd/main.go
@@ -0,0 +1,19 @@
+package main
+
+import (
+	"flag"
+
+	"github.com/flashbots/builder-playground/healthmon"
+)
+
+func main() {
+	var config healthmon.Config
+
+	flag.StringVar(&config.Chain, "chain", "", "Type of ethereum chain to monitor (beacon or execution)")
+	flag.StringVar(&config.URL, "url", "", "Full node URL (e.g., http://localhost:8545)")
+	flag.StringVar(&config.Addr, "service.addr", "localhost:21171", "Address for the health check service to listen on (e.g., ':21171')")
+	flag.IntVar(&config.BlockTimeSeconds, "blocktime", 0, "expected block time in seconds (optional)")
+	flag.Parse()
+
+	healthmon.Start(&config)
+}
diff --git a/healthmon/healthmon.go b/healthmon/healthmon.go
@@ -0,0 +1,249 @@
+package healthmon
+
+import (
+	"context"
+	"encoding/json"
+	"io"
+	"net/http"
+	"os"
+	"sync/atomic"
+	"time"
+
+	"github.com/ethereum/go-ethereum"
+	"github.com/ethereum/go-ethereum/core/types"
+	"github.com/ethereum/go-ethereum/ethclient"
+	mevboostrelay "github.com/flashbots/builder-playground/mev-boost-relay"
+	"github.com/flashbots/go-template/common"
+	"github.com/flashbots/mev-boost-relay/beaconclient"
+	mevRCommon "github.com/flashbots/mev-boost-relay/common"
+	"github.com/go-chi/httplog/v2"
+)
+
+var isHealthy atomic.Bool
+
+type Config struct {
+	Chain            string
+	URL              string
+	Addr             string
+	BlockTimeSeconds int
+}
+
+func Start(config *Config) {
+	log := common.SetupLogger(&common.LoggingOpts{
+		Version: common.Version,
+	})
+
+	updates := make(chan blockUpdate, 10)
+	log.Info("Started", "chain", config.Chain, "url", config.URL)
+
+	switch config.Chain {
+	case "beacon":
+		go monitorBeacon(log, context.Background(), config.URL, updates)
+	case "execution":
+		go monitorExecution(log, context.Background(), config.URL, updates)
+	default:
+		log.Error("Unknown chain", "chain", config.Chain)
+		os.Exit(1)
+	}
+
+	go monitor(log, config.BlockTimeSeconds, context.Background(), updates)
+
+	log.Info("Starting service server", "addr", config.Addr)
+
+	http.HandleFunc("/ready", statusHandler)
+	http.ListenAndServe(config.Addr, nil)
+}
+
+func statusHandler(w http.ResponseWriter, req *http.Request) {
+	if isHealthy.Load() {
+		io.WriteString(w, "OK")
+	} else {
+		w.WriteHeader(503)
+		io.WriteString(w, "NOT READY")
+	}
+}
+
+func setHealthy(healthy bool) {
+	isHealthy.Store(healthy)
+}
+
+type monitorState struct {
+	log              *httplog.Logger
+	firstBlockUpdate *blockUpdate
+	blockTimeSeconds int
+	blockTimer       *time.Timer
+}
+
+func newMonitorState(log *httplog.Logger, blockTimeSeconds int) *monitorState {
+	// this timer will start after the blocks are received and we can figure out the block time
+	blockTimer := time.NewTimer(0)
+	blockTimer.Stop()
+
+	return &monitorState{
+		log:              log,
+		firstBlockUpdate: nil,
+		blockTimeSeconds: blockTimeSeconds,
+		blockTimer:       blockTimer,
+	}
+}
+
+var wiggleRoomSeconds = 1
+
+func (m *monitorState) handleUpdate(update blockUpdate) {
+	m.log.Info("Processing block update", "number", update.Number, "timestamp", update.Timestamp)
+
+	if m.firstBlockUpdate == nil {
+		m.firstBlockUpdate = &update
+	}
+
+	if m.blockTimeSeconds == 0 {
+		// if block time is not known, either:
+		// - use the block time provided in the update (beacon)
+		// - use the difference between the first and current block (execution)
+		if update.BlockTime != 0 {
+			m.log.Info("Using block time from update", "block time seconds", update.BlockTime)
+			m.blockTimeSeconds = update.BlockTime
+		} else if m.firstBlockUpdate != nil && update.Number > m.firstBlockUpdate.Number {
+			blocktime := update.Timestamp.Sub(m.firstBlockUpdate.Timestamp)
+			m.log.Info("Calculated block time from timestamps", "block time seconds", blocktime)
+			m.blockTimeSeconds = int(blocktime.Seconds())
+		}
+	}
+
+	if m.blockTimeSeconds != 0 {
+		m.log.Info("Resetting block timer", "blockTimeSeconds", m.blockTimeSeconds)
+		m.blockTimer.Reset(time.Duration(m.blockTimeSeconds+wiggleRoomSeconds) * time.Second)
+	}
+}
+
+func monitor(log *httplog.Logger, blockTimeSeconds int, ctx context.Context, updates <-chan blockUpdate) {
+	state := newMonitorState(log, blockTimeSeconds)
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case update := <-updates:
+			// receiving a block always means healthy since the node is producing blocks
+			// and the unhealthy state is set during the block timer timeout
+			setHealthy(true)
+
+			state.handleUpdate(update)
+
+		case <-state.blockTimer.C:
+			log.Warn("Block timer expired, setting unhealthy")
+			setHealthy(false)
+		}
+	}
+}
+
+type blockUpdate struct {
+	Number    uint64
+	Timestamp time.Time
+	BlockTime int
+}
+
+func monitorBeacon(log *httplog.Logger, ctx context.Context, url string, updates chan<- blockUpdate) {
+	bLog := mevRCommon.LogSetup(false, "info")
+	beaconClient := beaconclient.NewProdBeaconInstance(bLog, url, url)
+
+	var lastSlot *uint64
+	var blockTime int
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-time.After(500 * time.Millisecond):
+			sync, err := beaconClient.SyncStatus()
+			if err != nil {
+				log.Error("Failed to get beacon sync status", "err", err)
+				continue
+			}
+
+			if sync.IsSyncing {
+				log.Debug("Beacon node is syncing", "headSlot", sync.HeadSlot)
+				continue
+			}
+
+			if blockTime == 0 {
+				spec, err := mevboostrelay.GetSpec(url)
+				if err != nil {
+					log.Error("Failed to get beacon spec", "err", err)
+				} else {
+					blockTime = int(spec.SecondsPerSlot)
+					log.Info("Fetched beacon spec", "blockTime", blockTime)
+				}
+			}
+
+			if lastSlot == nil || *lastSlot < sync.HeadSlot {
+				lastSlot = &sync.HeadSlot
+				log.Info("New beacon block received", "slot", sync.HeadSlot)
+				updates <- blockUpdate{Number: sync.HeadSlot, BlockTime: blockTime}
+			}
+		}
+	}
+}
+
+func monitorExecution(log *httplog.Logger, ctx context.Context, url string, updates chan<- blockUpdate) {
+	client, err := ethclient.Dial(url)
+	if err != nil {
+		log.Error("Failed to connect to execution client", "err", err)
+		os.Exit(1)
+	}
+
+	getLatestBlock := func() (*types.Header, error) {
+		// We use a manual RPC call instead of the Geth SDK's HeaderByNumber because
+		// we query both OP and normal L1 clients which have different transaction types
+		// that cannot be decoded with a single Geth SDK. The Geth SDK only returns blocks
+		// with transactions fully decoded (not just hashes), so we call the RPC directly
+		// to avoid transaction decoding issues.
+		var raw json.RawMessage
+		if err := client.Client().CallContext(ctx, &raw, "eth_getBlockByNumber", "latest", false); err != nil {
+			return nil, err
+		}
+
+		// Decode header and transactions.
+		var head *types.Header
+		if err := json.Unmarshal(raw, &head); err != nil {
+			return nil, err
+		}
+		// When the block is not found, the API returns JSON null.
+		if head == nil {
+			return nil, ethereum.NotFound
+		}
+		return head, nil
+	}
+
+	var lastBlock *uint64
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-time.After(500 * time.Millisecond):
+			sync, err := client.SyncProgress(ctx)
+			if err != nil {
+				log.Error("Failed to get execution sync progress", "err", err)
+				continue
+			}
+
+			if sync != nil && !sync.Done() {
+				log.Debug("Execution node is syncing", "currentBlock", sync.CurrentBlock, "highestBlock", sync.HighestBlock)
+				continue
+			}
+			header, err := getLatestBlock()
+			if err != nil {
+				log.Error("Failed to get execution block number", "err", err)
+				continue
+			}
+			num := header.Number.Uint64()
+			if lastBlock == nil || num > *lastBlock {
+				lastBlock = &num
+				timestamp := time.Unix(int64(header.Time), 0)
+
+				log.Info("New execution block received", "number", num)
+				updates <- blockUpdate{Number: num, Timestamp: timestamp}
+			}
+		}
+	}
+}
diff --git a/healthmon/healthmon_test.go b/healthmon/healthmon_test.go
diff --git a/mev-boost-relay/mev-boost-relay.go b/mev-boost-relay/mev-boost-relay.go
diff --git a/playground/components.go b/playground/components.go