Skip to content

Commit d16e65d

Browse files
Merge pull request #4004 from TheBlueMatt/2025-07-mon-event-failures-1
Replay lost `MonitorEvent`s in some cases for closed channels
2 parents 1c36624 + f809e6c commit d16e65d

File tree

5 files changed

+646
-34
lines changed

5 files changed

+646
-34
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 145 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ use crate::util::ser::{
7777
use crate::prelude::*;
7878

7979
use crate::io::{self, Error};
80-
use crate::sync::{LockTestExt, Mutex};
80+
use crate::sync::Mutex;
8181
use core::ops::Deref;
8282
use core::{cmp, mem};
8383

@@ -1376,18 +1376,30 @@ macro_rules! holder_commitment_htlcs {
13761376
/// Transaction outputs to watch for on-chain spends.
13771377
pub type TransactionOutputs = (Txid, Vec<(u32, TxOut)>);
13781378

1379+
// Because we have weird workarounds for `ChannelMonitor` equality checks in `OnchainTxHandler` and
1380+
// `PackageTemplate` the equality implementation isn't really fit for public consumption. Instead,
1381+
// we only expose it during tests.
1382+
#[cfg(any(feature = "_test_utils", test))]
13791383
impl<Signer: EcdsaChannelSigner> PartialEq for ChannelMonitor<Signer>
13801384
where
13811385
Signer: PartialEq,
13821386
{
1383-
#[rustfmt::skip]
13841387
fn eq(&self, other: &Self) -> bool {
1388+
use crate::sync::LockTestExt;
13851389
// We need some kind of total lockorder. Absent a better idea, we sort by position in
13861390
// memory and take locks in that order (assuming that we can't move within memory while a
13871391
// lock is held).
13881392
let ord = ((self as *const _) as usize) < ((other as *const _) as usize);
1389-
let a = if ord { self.inner.unsafe_well_ordered_double_lock_self() } else { other.inner.unsafe_well_ordered_double_lock_self() };
1390-
let b = if ord { other.inner.unsafe_well_ordered_double_lock_self() } else { self.inner.unsafe_well_ordered_double_lock_self() };
1393+
let a = if ord {
1394+
self.inner.unsafe_well_ordered_double_lock_self()
1395+
} else {
1396+
other.inner.unsafe_well_ordered_double_lock_self()
1397+
};
1398+
let b = if ord {
1399+
other.inner.unsafe_well_ordered_double_lock_self()
1400+
} else {
1401+
self.inner.unsafe_well_ordered_double_lock_self()
1402+
};
13911403
a.eq(&b)
13921404
}
13931405
}
@@ -2995,33 +3007,147 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
29953007
/// This is similar to [`Self::get_pending_or_resolved_outbound_htlcs`] except it includes
29963008
/// HTLCs which were resolved on-chain (i.e. where the final HTLC resolution was done by an
29973009
/// event from this `ChannelMonitor`).
2998-
#[rustfmt::skip]
2999-
pub(crate) fn get_all_current_outbound_htlcs(&self) -> HashMap<HTLCSource, (HTLCOutputInCommitment, Option<PaymentPreimage>)> {
3010+
pub(crate) fn get_all_current_outbound_htlcs(
3011+
&self,
3012+
) -> HashMap<HTLCSource, (HTLCOutputInCommitment, Option<PaymentPreimage>)> {
30003013
let mut res = new_hash_map();
30013014
// Just examine the available counterparty commitment transactions. See docs on
30023015
// `fail_unbroadcast_htlcs`, below, for justification.
30033016
let us = self.inner.lock().unwrap();
3004-
macro_rules! walk_counterparty_commitment {
3005-
($txid: expr) => {
3006-
if let Some(ref latest_outpoints) = us.funding.counterparty_claimable_outpoints.get($txid) {
3007-
for &(ref htlc, ref source_option) in latest_outpoints.iter() {
3008-
if let &Some(ref source) = source_option {
3009-
res.insert((**source).clone(), (htlc.clone(),
3010-
us.counterparty_fulfilled_htlcs.get(&SentHTLCId::from_source(source)).cloned()));
3011-
}
3017+
let mut walk_counterparty_commitment = |txid| {
3018+
if let Some(latest_outpoints) = us.funding.counterparty_claimable_outpoints.get(txid) {
3019+
for &(ref htlc, ref source_option) in latest_outpoints.iter() {
3020+
if let &Some(ref source) = source_option {
3021+
let htlc_id = SentHTLCId::from_source(source);
3022+
let preimage_opt = us.counterparty_fulfilled_htlcs.get(&htlc_id).cloned();
3023+
res.insert((**source).clone(), (htlc.clone(), preimage_opt));
30123024
}
30133025
}
30143026
}
3015-
}
3027+
};
30163028
if let Some(ref txid) = us.funding.current_counterparty_commitment_txid {
3017-
walk_counterparty_commitment!(txid);
3029+
walk_counterparty_commitment(txid);
30183030
}
30193031
if let Some(ref txid) = us.funding.prev_counterparty_commitment_txid {
3020-
walk_counterparty_commitment!(txid);
3032+
walk_counterparty_commitment(txid);
30213033
}
30223034
res
30233035
}
30243036

3037+
/// Gets the set of outbound HTLCs which hit the chain and ultimately were claimed by us via
3038+
/// the timeout path and reached [`ANTI_REORG_DELAY`] confirmations. This is used to determine
3039+
/// if an HTLC has failed without the `ChannelManager` having seen it prior to being persisted.
3040+
pub(crate) fn get_onchain_failed_outbound_htlcs(&self) -> HashMap<HTLCSource, PaymentHash> {
3041+
let mut res = new_hash_map();
3042+
let us = self.inner.lock().unwrap();
3043+
3044+
// We only want HTLCs with ANTI_REORG_DELAY confirmations, which implies the commitment
3045+
// transaction has least ANTI_REORG_DELAY confirmations for any dependent HTLC transactions
3046+
// to have been confirmed.
3047+
let confirmed_txid = us.funding_spend_confirmed.or_else(|| {
3048+
us.onchain_events_awaiting_threshold_conf.iter().find_map(|event| {
3049+
if let OnchainEvent::FundingSpendConfirmation { .. } = event.event {
3050+
if event.height + ANTI_REORG_DELAY - 1 <= us.best_block.height {
3051+
Some(event.txid)
3052+
} else {
3053+
None
3054+
}
3055+
} else {
3056+
None
3057+
}
3058+
})
3059+
});
3060+
3061+
let confirmed_txid = if let Some(txid) = confirmed_txid {
3062+
txid
3063+
} else {
3064+
return res;
3065+
};
3066+
3067+
macro_rules! walk_htlcs {
3068+
($htlc_iter: expr) => {
3069+
let mut walk_candidate_htlcs = |htlcs| {
3070+
for &(ref candidate_htlc, ref candidate_source) in htlcs {
3071+
let candidate_htlc: &HTLCOutputInCommitment = &candidate_htlc;
3072+
let candidate_source: &Option<Box<HTLCSource>> = &candidate_source;
3073+
3074+
let source: &HTLCSource = if let Some(source) = candidate_source {
3075+
source
3076+
} else {
3077+
continue;
3078+
};
3079+
let confirmed = $htlc_iter.find(|(_, conf_src)| Some(source) == *conf_src);
3080+
if let Some((confirmed_htlc, _)) = confirmed {
3081+
let filter = |v: &&IrrevocablyResolvedHTLC| {
3082+
v.commitment_tx_output_idx
3083+
== confirmed_htlc.transaction_output_index
3084+
};
3085+
3086+
// The HTLC was included in the confirmed commitment transaction, so we
3087+
// need to see if it has been irrevocably failed yet.
3088+
if confirmed_htlc.transaction_output_index.is_none() {
3089+
// Dust HTLCs are always implicitly failed once the commitment
3090+
// transaction reaches ANTI_REORG_DELAY confirmations.
3091+
res.insert(source.clone(), confirmed_htlc.payment_hash);
3092+
} else if let Some(state) =
3093+
us.htlcs_resolved_on_chain.iter().filter(filter).next()
3094+
{
3095+
if state.payment_preimage.is_none() {
3096+
res.insert(source.clone(), confirmed_htlc.payment_hash);
3097+
}
3098+
}
3099+
} else {
3100+
// The HTLC was not included in the confirmed commitment transaction,
3101+
// which has now reached ANTI_REORG_DELAY confirmations and thus the
3102+
// HTLC has been failed.
3103+
res.insert(source.clone(), candidate_htlc.payment_hash);
3104+
}
3105+
}
3106+
};
3107+
3108+
// We walk the set of HTLCs in the unrevoked counterparty commitment transactions (see
3109+
// `fail_unbroadcast_htlcs` for a description of why).
3110+
if let Some(ref txid) = us.funding.current_counterparty_commitment_txid {
3111+
let htlcs = us.funding.counterparty_claimable_outpoints.get(txid);
3112+
walk_candidate_htlcs(htlcs.expect("Missing tx info for latest tx"));
3113+
}
3114+
if let Some(ref txid) = us.funding.prev_counterparty_commitment_txid {
3115+
let htlcs = us.funding.counterparty_claimable_outpoints.get(txid);
3116+
walk_candidate_htlcs(htlcs.expect("Missing tx info for previous tx"));
3117+
}
3118+
};
3119+
}
3120+
3121+
let funding = get_confirmed_funding_scope!(us);
3122+
3123+
if Some(confirmed_txid) == funding.current_counterparty_commitment_txid
3124+
|| Some(confirmed_txid) == funding.prev_counterparty_commitment_txid
3125+
{
3126+
let htlcs = funding.counterparty_claimable_outpoints.get(&confirmed_txid).unwrap();
3127+
walk_htlcs!(htlcs.iter().filter_map(|(a, b)| {
3128+
if let &Some(ref source) = b {
3129+
Some((a, Some(&**source)))
3130+
} else {
3131+
None
3132+
}
3133+
}));
3134+
} else if confirmed_txid == funding.current_holder_commitment_tx.trust().txid() {
3135+
walk_htlcs!(holder_commitment_htlcs!(us, CURRENT_WITH_SOURCES));
3136+
} else if let Some(prev_commitment_tx) = &funding.prev_holder_commitment_tx {
3137+
if confirmed_txid == prev_commitment_tx.trust().txid() {
3138+
walk_htlcs!(holder_commitment_htlcs!(us, PREV_WITH_SOURCES).unwrap());
3139+
} else {
3140+
let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[];
3141+
walk_htlcs!(htlcs_confirmed.iter());
3142+
}
3143+
} else {
3144+
let htlcs_confirmed: &[(&HTLCOutputInCommitment, _)] = &[];
3145+
walk_htlcs!(htlcs_confirmed.iter());
3146+
}
3147+
3148+
res
3149+
}
3150+
30253151
/// Gets the set of outbound HTLCs which are pending resolution in this channel or which were
30263152
/// resolved with a preimage from our counterparty.
30273153
///
@@ -5829,6 +5955,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
58295955
on_to_local_output_csv: None,
58305956
},
58315957
});
5958+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
58325959
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
58335960
source,
58345961
payment_preimage: Some(payment_preimage),
@@ -5852,6 +5979,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
58525979
on_to_local_output_csv: None,
58535980
},
58545981
});
5982+
self.counterparty_fulfilled_htlcs.insert(SentHTLCId::from_source(&source), payment_preimage);
58555983
self.pending_monitor_events.push(MonitorEvent::HTLCEvent(HTLCUpdate {
58565984
source,
58575985
payment_preimage: Some(payment_preimage),

lightning/src/chain/package.rs

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ enum PackageMalleability {
10931093
///
10941094
/// As packages are time-sensitive, we fee-bump and rebroadcast them at scheduled intervals.
10951095
/// Failing to confirm a package translate as a loss of funds for the user.
1096-
#[derive(Clone, Debug, PartialEq, Eq)]
1096+
#[derive(Clone, Debug, Eq)]
10971097
pub struct PackageTemplate {
10981098
// List of onchain outputs and solving data to generate satisfying witnesses.
10991099
inputs: Vec<(BitcoinOutPoint, PackageSolvingData)>,
@@ -1122,6 +1122,50 @@ pub struct PackageTemplate {
11221122
height_timer: u32,
11231123
}
11241124

1125+
impl PartialEq for PackageTemplate {
1126+
fn eq(&self, o: &Self) -> bool {
1127+
if self.inputs != o.inputs
1128+
|| self.malleability != o.malleability
1129+
|| self.feerate_previous != o.feerate_previous
1130+
|| self.height_timer != o.height_timer
1131+
{
1132+
return false;
1133+
}
1134+
#[cfg(test)]
1135+
{
1136+
// In some cases we may reset `counterparty_spendable_height` to zero on reload, which
1137+
// can cause our test assertions that ChannelMonitors round-trip exactly to trip. Here
1138+
// we allow exactly the same case as we tweak in the `PackageTemplate` `Readable`
1139+
// implementation.
1140+
if self.counterparty_spendable_height == 0 {
1141+
for (_, input) in self.inputs.iter() {
1142+
if let PackageSolvingData::RevokedHTLCOutput(RevokedHTLCOutput {
1143+
htlc, ..
1144+
}) = input
1145+
{
1146+
if !htlc.offered && htlc.cltv_expiry != 0 {
1147+
return true;
1148+
}
1149+
}
1150+
}
1151+
}
1152+
if o.counterparty_spendable_height == 0 {
1153+
for (_, input) in o.inputs.iter() {
1154+
if let PackageSolvingData::RevokedHTLCOutput(RevokedHTLCOutput {
1155+
htlc, ..
1156+
}) = input
1157+
{
1158+
if !htlc.offered && htlc.cltv_expiry != 0 {
1159+
return true;
1160+
}
1161+
}
1162+
}
1163+
}
1164+
}
1165+
self.counterparty_spendable_height == o.counterparty_spendable_height
1166+
}
1167+
}
1168+
11251169
impl PackageTemplate {
11261170
#[rustfmt::skip]
11271171
pub(crate) fn can_merge_with(&self, other: &PackageTemplate, cur_height: u32) -> bool {

lightning/src/ln/channelmanager.rs

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15798,7 +15798,7 @@ where
1579815798
log_error!(logger, " The ChannelMonitor for channel {} is at counterparty commitment transaction number {} but the ChannelManager is at counterparty commitment transaction number {}.",
1579915799
&channel.context.channel_id(), monitor.get_cur_counterparty_commitment_number(), channel.get_cur_counterparty_commitment_transaction_number());
1580015800
}
15801-
let mut shutdown_result =
15801+
let shutdown_result =
1580215802
channel.force_shutdown(ClosureReason::OutdatedChannelManager);
1580315803
if shutdown_result.unbroadcasted_batch_funding_txid.is_some() {
1580415804
return Err(DecodeError::InvalidValue);
@@ -15830,7 +15830,10 @@ where
1583015830
},
1583115831
);
1583215832
}
15833-
failed_htlcs.append(&mut shutdown_result.dropped_outbound_htlcs);
15833+
for (source, hash, cp_id, chan_id) in shutdown_result.dropped_outbound_htlcs {
15834+
let reason = LocalHTLCFailureReason::ChannelClosed;
15835+
failed_htlcs.push((source, hash, cp_id, chan_id, reason));
15836+
}
1583415837
channel_closures.push_back((
1583515838
events::Event::ChannelClosed {
1583615839
channel_id: channel.context.channel_id(),
@@ -15872,6 +15875,7 @@ where
1587215875
*payment_hash,
1587315876
channel.context.get_counterparty_node_id(),
1587415877
channel.context.channel_id(),
15878+
LocalHTLCFailureReason::ChannelClosed,
1587515879
));
1587615880
}
1587715881
}
@@ -16445,8 +16449,11 @@ where
1644516449
// payments which are still in-flight via their on-chain state.
1644616450
// We only rebuild the pending payments map if we were most recently serialized by
1644716451
// 0.0.102+
16452+
//
16453+
// First we rebuild all pending payments, then separately re-claim and re-fail pending
16454+
// payments. This avoids edge-cases around MPP payments resulting in redundant actions.
1644816455
for (channel_id, monitor) in args.channel_monitors.iter() {
16449-
let mut is_channel_closed = false;
16456+
let mut is_channel_closed = true;
1645016457
let counterparty_node_id = monitor.get_counterparty_node_id();
1645116458
if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) {
1645216459
let mut peer_state_lock = peer_state_mtx.lock().unwrap();
@@ -16483,6 +16490,18 @@ where
1648316490
);
1648416491
}
1648516492
}
16493+
}
16494+
}
16495+
for (channel_id, monitor) in args.channel_monitors.iter() {
16496+
let mut is_channel_closed = true;
16497+
let counterparty_node_id = monitor.get_counterparty_node_id();
16498+
if let Some(peer_state_mtx) = per_peer_state.get(&counterparty_node_id) {
16499+
let mut peer_state_lock = peer_state_mtx.lock().unwrap();
16500+
let peer_state = &mut *peer_state_lock;
16501+
is_channel_closed = !peer_state.channel_by_id.contains_key(channel_id);
16502+
}
16503+
16504+
if is_channel_closed {
1648616505
for (htlc_source, (htlc, preimage_opt)) in
1648716506
monitor.get_all_current_outbound_htlcs()
1648816507
{
@@ -16580,6 +16599,20 @@ where
1658016599
},
1658116600
}
1658216601
}
16602+
for (htlc_source, payment_hash) in monitor.get_onchain_failed_outbound_htlcs() {
16603+
log_info!(
16604+
args.logger,
16605+
"Failing HTLC with payment hash {} as it was resolved on-chain.",
16606+
payment_hash
16607+
);
16608+
failed_htlcs.push((
16609+
htlc_source,
16610+
payment_hash,
16611+
monitor.get_counterparty_node_id(),
16612+
monitor.channel_id(),
16613+
LocalHTLCFailureReason::OnChainTimeout,
16614+
));
16615+
}
1658316616
}
1658416617

1658516618
// Whether the downstream channel was closed or not, try to re-apply any payment
@@ -17260,13 +17293,10 @@ where
1726017293
}
1726117294
}
1726217295

17263-
for htlc_source in failed_htlcs.drain(..) {
17264-
let (source, payment_hash, counterparty_node_id, channel_id) = htlc_source;
17265-
let failure_reason = LocalHTLCFailureReason::ChannelClosed;
17266-
let receiver = HTLCHandlingFailureType::Forward {
17267-
node_id: Some(counterparty_node_id),
17268-
channel_id,
17269-
};
17296+
for htlc_source in failed_htlcs {
17297+
let (source, payment_hash, counterparty_id, channel_id, failure_reason) = htlc_source;
17298+
let receiver =
17299+
HTLCHandlingFailureType::Forward { node_id: Some(counterparty_id), channel_id };
1727017300
let reason = HTLCFailReason::from_failure_code(failure_reason);
1727117301
channel_manager.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
1727217302
}

0 commit comments

Comments
 (0)