Skip to content

sweepbatcher: fix reorg detection #975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ require (
github.com/stretchr/testify v1.10.0
github.com/urfave/cli v1.22.14
go.etcd.io/bbolt v1.3.11
golang.org/x/net v0.38.0
golang.org/x/sync v0.12.0
google.golang.org/grpc v1.64.1
google.golang.org/protobuf v1.34.2
Expand Down Expand Up @@ -184,6 +183,7 @@ require (
golang.org/x/crypto v0.36.0 // indirect
golang.org/x/exp v0.0.0-20240325151524-a685a6edb6d8 // indirect
golang.org/x/mod v0.21.0 // indirect
golang.org/x/net v0.38.0 // indirect
golang.org/x/sys v0.31.0 // indirect
golang.org/x/term v0.30.0 // indirect
golang.org/x/text v0.23.0 // indirect
Expand Down
5 changes: 5 additions & 0 deletions loopout.go
Original file line number Diff line number Diff line change
Expand Up @@ -1246,7 +1246,11 @@ func (s *loopOutSwap) waitForHtlcSpendConfirmedV2(globalCtx context.Context,
s.height = notification.(int32)
timerChan = s.timerFactory(repushDelay)

s.log.Infof("Received block %d", s.height)

case <-timerChan:
s.log.Infof("Checking the sweep")

// canSweep will return false if the preimage is
// not revealed yet but the conf target is closer than
// 20 blocks. In this case to be sure we won't attempt
Expand All @@ -1268,6 +1272,7 @@ func (s *loopOutSwap) waitForHtlcSpendConfirmedV2(globalCtx context.Context,
}

// Send the sweep to the sweeper.
s.log.Infof("(Re)adding the sweep to sweepbatcher")
err := s.batcher.AddSweep(ctx, &sweepReq)
if err != nil {
return nil, err
Expand Down
150 changes: 84 additions & 66 deletions sweepbatcher/sweep_batch.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,10 @@ type batchConfig struct {
// initial delay completion and publishing the batch transaction.
batchPublishDelay time.Duration

// noBumping instructs sweepbatcher not to fee bump itself and rely on
// external source of fee rates (FeeRateProvider).
noBumping bool
// customFeeRate provides custom min fee rate per swap. The batch uses
// max of the fee rates of its swaps. In this mode confTarget is
// ignored and fee bumping by sweepbatcher is disabled.
customFeeRate FeeRateProvider

// txLabeler is a function generating a transaction label. It is called
// before publishing a batch transaction. Batch ID is passed to it.
Expand Down Expand Up @@ -232,6 +233,10 @@ type batch struct {
// spendChan is the channel over which spend notifications are received.
spendChan chan *chainntnfs.SpendDetail

// spendErrChan is the channel over which spend notifier errors are
// received.
spendErrChan chan error

// confChan is the channel over which confirmation notifications are
// received.
confChan chan *chainntnfs.TxConfirmation
Expand Down Expand Up @@ -378,9 +383,7 @@ func NewBatch(cfg batchConfig, bk batchKit) *batch {
id: -1,
state: Open,
sweeps: make(map[wire.OutPoint]sweep),
spendChan: make(chan *chainntnfs.SpendDetail),
confChan: make(chan *chainntnfs.TxConfirmation, 1),
reorgChan: make(chan struct{}, 1),
testReqs: make(chan *testRequest),
errChan: make(chan error, 1),
callEnter: make(chan struct{}),
Expand Down Expand Up @@ -423,9 +426,7 @@ func NewBatchFromDB(cfg batchConfig, bk batchKit) (*batch, error) {
state: bk.state,
primarySweepID: bk.primaryID,
sweeps: bk.sweeps,
spendChan: make(chan *chainntnfs.SpendDetail),
confChan: make(chan *chainntnfs.TxConfirmation, 1),
reorgChan: make(chan struct{}, 1),
testReqs: make(chan *testRequest),
errChan: make(chan error, 1),
callEnter: make(chan struct{}),
Expand Down Expand Up @@ -723,6 +724,9 @@ func (b *batch) addSweeps(ctx context.Context, sweeps []*sweep) (bool, error) {
// lower that minFeeRate of other sweeps (so it is
// applied).
if b.rbfCache.FeeRate < s.minFeeRate {
b.Infof("Increasing feerate of the batch "+
"from %v to %v", b.rbfCache.FeeRate,
s.minFeeRate)
b.rbfCache.FeeRate = s.minFeeRate
}
}
Expand Down Expand Up @@ -769,6 +773,9 @@ func (b *batch) addSweeps(ctx context.Context, sweeps []*sweep) (bool, error) {
// Update FeeRate. Max(s.minFeeRate) for all the sweeps of
// the batch is the basis for fee bumps.
if b.rbfCache.FeeRate < s.minFeeRate {
b.Infof("Increasing feerate of the batch "+
"from %v to %v", b.rbfCache.FeeRate,
s.minFeeRate)
b.rbfCache.FeeRate = s.minFeeRate
b.rbfCache.SkipNextBump = true
}
Expand Down Expand Up @@ -968,6 +975,12 @@ func (b *batch) Run(ctx context.Context) error {
continue
}

// Update feerate of sweeps. This is normally done by
// AddSweep, but it may not be called after the sweep
// is confirmed, but fresh feerate is still needed to
// keep publishing in case of reorg.
b.updateFeeRate(ctx)

err := b.publish(ctx)
if err != nil {
return fmt.Errorf("publish error: %w", err)
Expand All @@ -979,23 +992,26 @@ func (b *batch) Run(ctx context.Context) error {
return fmt.Errorf("handleSpend error: %w", err)
}

case err := <-b.spendErrChan:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unrelated to this, but since the Run method is quite long, could we move the code at the beginning for around initialDelay and initialDelayChan into a separate function?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

initialDelay, initialDelayChan and related variables in the beginning of Run are too connected to each other. In the current version they are all local and it is easy to follow the logic of waiting. How to factor out these variables without sacrificing readability?

I updated the last commit: factored out feerate updates to updateFeeRate method.

b.writeToSpendErrChan(ctx, err)

return fmt.Errorf("spend notifier failed: %w", err)

case conf := <-b.confChan:
if err := b.handleConf(runCtx, conf); err != nil {
return fmt.Errorf("handleConf error: %w", err)
}

return nil

// A re-org has been detected. We set the batch state back to
// open since our batch transaction is no longer present in any
// block. We can accept more sweeps and try to publish.
case <-b.reorgChan:
b.state = Open
b.Warnf("reorg detected, batch is able to " +
"accept new sweeps")

err := b.monitorSpend(ctx, b.sweeps[b.primarySweepID])
if err != nil {
return fmt.Errorf("monitorSpend error: %w", err)
}

case testReq := <-b.testReqs:
testReq.handler()
close(testReq.quit)
Expand All @@ -1013,6 +1029,41 @@ func (b *batch) Run(ctx context.Context) error {
}
}

// updateFeeRate gets fresh values of minFeeRate for sweeps and updates the
// feerate of the batch if needed. This method must be called from event loop.
func (b *batch) updateFeeRate(ctx context.Context) {
for outpoint, s := range b.sweeps {
minFeeRate, err := minimumSweepFeeRate(
ctx, b.cfg.customFeeRate, b.wallet,
s.swapHash, s.outpoint, s.confTarget,
)
if err != nil {
b.Warnf("failed to determine feerate for sweep %v of "+
"swap %x, confTarget %d: %w", s.outpoint,
s.swapHash[:6], s.confTarget, err)
continue
}

if minFeeRate <= s.minFeeRate {
continue
}

b.Infof("Increasing feerate of sweep %v of swap %x from %v "+
"to %v", s.outpoint, s.swapHash[:6], s.minFeeRate,
minFeeRate)
s.minFeeRate = minFeeRate
b.sweeps[outpoint] = s

if s.minFeeRate <= b.rbfCache.FeeRate {
continue
}

b.Infof("Increasing feerate of the batch from %v to %v",
b.rbfCache.FeeRate, s.minFeeRate)
b.rbfCache.FeeRate = s.minFeeRate
}
}

// testRunInEventLoop runs a function in the event loop blocking until
// the function returns. For unit tests only!
func (b *batch) testRunInEventLoop(ctx context.Context, handler func()) {
Expand Down Expand Up @@ -1790,7 +1841,7 @@ func (b *batch) updateRbfRate(ctx context.Context) error {

// Set the initial value for our fee rate.
b.rbfCache.FeeRate = rate
} else if !b.cfg.noBumping {
} else if noBumping := b.cfg.customFeeRate != nil; !noBumping {
if b.rbfCache.SkipNextBump {
// Skip fee bumping, unset the flag, to bump next time.
b.rbfCache.SkipNextBump = false
Expand All @@ -1812,44 +1863,33 @@ func (b *batch) updateRbfRate(ctx context.Context) error {
// of the batch transaction gets confirmed, due to the uncertainty of RBF
// replacements and network propagation, we can always detect the transaction.
func (b *batch) monitorSpend(ctx context.Context, primarySweep sweep) error {
spendCtx, cancel := context.WithCancel(ctx)
if b.spendChan != nil || b.spendErrChan != nil || b.reorgChan != nil {
return fmt.Errorf("an attempt to run monitorSpend multiple " +
"times per batch")
}

reorgChan := make(chan struct{}, 1)

spendChan, spendErr, err := b.chainNotifier.RegisterSpendNtfn(
spendCtx, &primarySweep.outpoint, primarySweep.htlc.PkScript,
spendChan, spendErrChan, err := b.chainNotifier.RegisterSpendNtfn(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will RegisterSpendNtfn keep spendChan and spendErrChan open and signal spends/errors after the reorg chan fired?
I assume yes, so this is a nice simplification of the spend-handling flow.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I added an itest for LND to check this: lightningnetwork/lnd#10083

ctx, &primarySweep.outpoint, primarySweep.htlc.PkScript,
primarySweep.initiationHeight,
lndclient.WithReOrgChan(reorgChan),
)
if err != nil {
cancel()

return err
return fmt.Errorf("failed to register spend notifier for "+
"primary sweep %v, pkscript %x, height %d: %w",
primarySweep.outpoint, primarySweep.htlc.PkScript,
primarySweep.initiationHeight, err)
}

b.wg.Add(1)
go func() {
defer cancel()
defer b.wg.Done()

b.Infof("monitoring spend for outpoint %s",
primarySweep.outpoint.String())
b.Infof("monitoring spend for outpoint %s",
primarySweep.outpoint.String())

select {
case spend := <-spendChan:
select {
case b.spendChan <- spend:

case <-ctx.Done():
}

case err := <-spendErr:
b.writeToSpendErrChan(ctx, err)

b.writeToErrChan(
fmt.Errorf("spend error: %w", err),
)

case <-ctx.Done():
}
}()
// This is safe to do as we always call monitorSpend from the event
// loop's goroutine.
b.spendChan = spendChan
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice simplification. I'd add a comment that this is safe to do as we're always calling monitorSpend from the runloop's goroutine.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I added the comment.

b.spendErrChan = spendErrChan
b.reorgChan = reorgChan

return nil
}
Expand All @@ -1862,14 +1902,11 @@ func (b *batch) monitorConfirmations(ctx context.Context) error {
return fmt.Errorf("can't find primarySweep")
}

reorgChan := make(chan struct{})

confCtx, cancel := context.WithCancel(ctx)

confChan, errChan, err := b.chainNotifier.RegisterConfirmationsNtfn(
confCtx, b.batchTxid, b.batchPkScript, batchConfHeight,
primarySweep.initiationHeight,
lndclient.WithReOrgChan(reorgChan),
)
if err != nil {
cancel()
Expand All @@ -1895,18 +1932,6 @@ func (b *batch) monitorConfirmations(ctx context.Context) error {
b.writeToErrChan(fmt.Errorf("confirmations "+
"monitoring error: %w", err))

case <-reorgChan:
// A re-org has been detected. We set the batch
// state back to open since our batch
// transaction is no longer present in any
// block. We can accept more sweeps and try to
// publish new transactions, at this point we
// need to monitor again for a new spend.
select {
case b.reorgChan <- struct{}{}:
case <-ctx.Done():
}

case <-ctx.Done():
}
}()
Expand Down Expand Up @@ -2395,12 +2420,6 @@ func (b *batch) writeToErrChan(err error) {

// writeToSpendErrChan sends an error to spend error channels of all the sweeps.
func (b *batch) writeToSpendErrChan(ctx context.Context, spendErr error) {
done, err := b.scheduleNextCall()
if err != nil {
done()

return
}
notifiers := make([]*SpendNotifier, 0, len(b.sweeps))
for _, s := range b.sweeps {
// If the sweep's notifier is empty then this means that a swap
Expand All @@ -2412,7 +2431,6 @@ func (b *batch) writeToSpendErrChan(ctx context.Context, spendErr error) {

notifiers = append(notifiers, s.notifier)
}
done()

for _, notifier := range notifiers {
select {
Expand Down
Loading