From e4ca3ee667818910c3a3e21b69b56a45aa321bd6 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Wed, 9 Oct 2024 19:05:18 +0000 Subject: [PATCH] Req the counterparty node id when claiming against a closed chan Currently we store in-flight `ChannelMonitorUpdate`s in the per-peer structure in `ChannelManager`. This is nice and simple as we're generally updating it when we're updating other per-peer data, so we already have the relevant lock(s) and map entries. Sadly, when we're claiming an HTLC against a closed channel, we didn't have the `counterparty_node_id` available until it was added in 0.0.124 (and now we only have it for HTLCs which were forwarded in 0.0.124). This means we can't look up the per-peer structure when claiming old HTLCs, making it difficult to track the new `ChannelMonitorUpdate` as in-flight. While we could transition the in-flight `ChannelMonitorUpdate` tracking to a new global map indexed by `OutPoint`, doing so would result in a major lock which would be highly contended across channels with different peers. Instead, as we move towards tracking in-flight `ChannelMonitorUpdate`s for closed channels we'll keep our existing storage, leaving only the `counterparty_node_id` issue to contend with. Here we simply accept the issue, requiring that `counterparty_node_id` be available when claiming HTLCs against a closed channel. On startup, we explicitly check for any forwarded HTLCs which came from a closed channel where the forward happened prior to 0.0.124, failing to deserialize, or logging an warning if the channel is still open (implying things may work out, but panics may occur if the channel closes prior to HTLC resolution). While this is a somewhat dissapointing resolution, LDK nodes which forward HTLCs are generally fairly well-upgraded, so it is not anticipated to be an issue in practice. --- lightning/src/ln/channelmanager.rs | 166 ++++++++++++++++----- pending_changelog/matt-no-upgrade-skip.txt | 6 + 2 files changed, 131 insertions(+), 41 deletions(-) create mode 100644 pending_changelog/matt-no-upgrade-skip.txt diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 2cc12f579..15a8f0cdd 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -40,7 +40,7 @@ use crate::blinded_path::payment::{BlindedPaymentPath, Bolt12OfferContext, Bolt1 use crate::chain; use crate::chain::{Confirm, ChannelMonitorUpdateStatus, Watch, BestBlock}; use crate::chain::chaininterface::{BroadcasterInterface, ConfirmationTarget, FeeEstimator, LowerBoundedFeeEstimator}; -use crate::chain::channelmonitor::{ChannelMonitor, ChannelMonitorUpdate, WithChannelMonitor, ChannelMonitorUpdateStep, HTLC_FAIL_BACK_BUFFER, CLTV_CLAIM_BUFFER, LATENCY_GRACE_PERIOD_BLOCKS, ANTI_REORG_DELAY, MonitorEvent, CLOSED_CHANNEL_UPDATE_ID}; +use crate::chain::channelmonitor::{Balance, ChannelMonitor, ChannelMonitorUpdate, WithChannelMonitor, ChannelMonitorUpdateStep, HTLC_FAIL_BACK_BUFFER, CLTV_CLAIM_BUFFER, LATENCY_GRACE_PERIOD_BLOCKS, ANTI_REORG_DELAY, MonitorEvent, CLOSED_CHANNEL_UPDATE_ID}; use crate::chain::transaction::{OutPoint, TransactionData}; use crate::events; use crate::events::{Event, EventHandler, EventsProvider, MessageSendEvent, MessageSendEventsProvider, ClosureReason, HTLCDestination, PaymentFailureReason, ReplayEvent}; @@ -6836,15 +6836,16 @@ where debug_assert_ne!(self.pending_events.held_by_thread(), LockHeldState::HeldByThread); debug_assert_ne!(self.claimable_payments.held_by_thread(), LockHeldState::HeldByThread); + let counterparty_node_id_opt = prev_hop.counterparty_node_id.or_else(|| { + match self.short_to_chan_info.read().unwrap().get(&prev_hop.short_channel_id) { + Some((cp_id, _dup_chan_id)) => Some(cp_id.clone()), + None => None + } + }); + { let per_peer_state = self.per_peer_state.read().unwrap(); let chan_id = prev_hop.channel_id; - let counterparty_node_id_opt = prev_hop.counterparty_node_id.or_else(|| { - match self.short_to_chan_info.read().unwrap().get(&prev_hop.short_channel_id) { - Some((cp_id, _dup_chan_id)) => Some(cp_id.clone()), - None => None - } - }); let peer_state_opt = counterparty_node_id_opt.as_ref().map( |counterparty_node_id| per_peer_state.get(counterparty_node_id) @@ -6950,6 +6951,16 @@ where channel_id: Some(prev_hop.channel_id), }; + if counterparty_node_id_opt.is_none() { + let payment_hash: PaymentHash = payment_preimage.into(); + panic!( + "Prior to upgrading to LDK 0.1, all pending HTLCs must be resolved. It appears at least the HTLC with payment_hash {} (preimage {}) was not resolved. Please downgrade to LDK 0.0.124 and resolve the HTLC prior to upgrading.", + payment_hash, + payment_preimage, + ); + } + let counterparty_node_id = counterparty_node_id_opt.expect("Checked immediately above"); + if !during_init { // We update the ChannelMonitor on the backward link, after // receiving an `update_fulfill_htlc` from the forward link. @@ -6987,40 +6998,25 @@ where let (action_opt, raa_blocker_opt) = completion_action(None, false); if let Some(raa_blocker) = raa_blocker_opt { - let counterparty_node_id = prev_hop.counterparty_node_id.or_else(|| - // prev_hop.counterparty_node_id is always available for payments received after - // LDK 0.0.123, but for those received on 0.0.123 and claimed later, we need to - // look up the counterparty in the `action_opt`, if possible. - action_opt.as_ref().and_then(|action| - if let MonitorUpdateCompletionAction::PaymentClaimed { pending_mpp_claim, .. } = action { - pending_mpp_claim.as_ref().map(|(node_id, _, _, _)| *node_id) - } else { None } - ) - ); - if let Some(counterparty_node_id) = counterparty_node_id { - // TODO: Avoid always blocking the world for the write lock here. - let mut per_peer_state = self.per_peer_state.write().unwrap(); - let peer_state_mutex = per_peer_state.entry(counterparty_node_id).or_insert_with(|| - Mutex::new(PeerState { - channel_by_id: new_hash_map(), - inbound_channel_request_by_id: new_hash_map(), - latest_features: InitFeatures::empty(), - pending_msg_events: Vec::new(), - in_flight_monitor_updates: BTreeMap::new(), - monitor_update_blocked_actions: BTreeMap::new(), - actions_blocking_raa_monitor_updates: BTreeMap::new(), - is_connected: false, - })); - let mut peer_state = peer_state_mutex.lock().unwrap(); + // TODO: Avoid always blocking the world for the write lock here. + let mut per_peer_state = self.per_peer_state.write().unwrap(); + let peer_state_mutex = per_peer_state.entry(counterparty_node_id).or_insert_with(|| + Mutex::new(PeerState { + channel_by_id: new_hash_map(), + inbound_channel_request_by_id: new_hash_map(), + latest_features: InitFeatures::empty(), + pending_msg_events: Vec::new(), + in_flight_monitor_updates: BTreeMap::new(), + monitor_update_blocked_actions: BTreeMap::new(), + actions_blocking_raa_monitor_updates: BTreeMap::new(), + is_connected: false, + })); + let mut peer_state = peer_state_mutex.lock().unwrap(); - peer_state.actions_blocking_raa_monitor_updates - .entry(prev_hop.channel_id) - .or_default() - .push(raa_blocker); - } else { - debug_assert!(false, - "RAA ChannelMonitorUpdate blockers are only set with PaymentClaimed completion actions, so we should always have a counterparty node id"); - } + peer_state.actions_blocking_raa_monitor_updates + .entry(prev_hop.channel_id) + .or_default() + .push(raa_blocker); } self.handle_monitor_update_completion_actions(action_opt); @@ -12773,11 +12769,96 @@ where // Whether the downstream channel was closed or not, try to re-apply any payment // preimages from it which may be needed in upstream channels for forwarded // payments. + let mut fail_read = false; let outbound_claimed_htlcs_iter = monitor.get_all_current_outbound_htlcs() .into_iter() .filter_map(|(htlc_source, (htlc, preimage_opt))| { - if let HTLCSource::PreviousHopData(_) = htlc_source { + if let HTLCSource::PreviousHopData(prev_hop) = &htlc_source { if let Some(payment_preimage) = preimage_opt { + let inbound_edge_monitor = args.channel_monitors.get(&prev_hop.outpoint); + // Note that for channels which have gone to chain, + // `get_all_current_outbound_htlcs` is never pruned and always returns + // a constant set until the monitor is removed/archived. Thus, we + // want to skip replaying claims that have definitely been resolved + // on-chain. + + // If the inbound monitor is not present, we assume it was fully + // resolved and properly archived, implying this payment had plenty + // of time to get claimed and we can safely skip any further + // attempts to claim it (they wouldn't succeed anyway as we don't + // have a monitor against which to do so). + let inbound_edge_monitor = if let Some(monitor) = inbound_edge_monitor { + monitor + } else { + return None; + }; + // Second, if the inbound edge of the payment's monitor has been + // fully claimed we've had at least `ANTI_REORG_DELAY` blocks to + // get any PaymentForwarded event(s) to the user and assume that + // there's no need to try to replay the claim just for that. + let inbound_edge_balances = inbound_edge_monitor.get_claimable_balances(); + if inbound_edge_balances.is_empty() { + return None; + } + + if prev_hop.counterparty_node_id.is_none() { + // We no longer support claiming an HTLC where we don't have + // the counterparty_node_id available if the claim has to go to + // a closed channel. Its possible we can get away with it if + // the channel is not yet closed, but its by no means a + // guarantee. + + // Thus, in this case we are a bit more aggressive with our + // pruning - if we have no use for the claim (because the + // inbound edge of the payment's monitor has already claimed + // the HTLC) we skip trying to replay the claim. + let htlc_payment_hash: PaymentHash = payment_preimage.into(); + if !inbound_edge_balances.iter().all(|bal| { + match bal { + Balance::ClaimableOnChannelClose { .. } => { + // The channel is still open, assume we can still + // claim against it + false + }, + Balance::MaybePreimageClaimableHTLC { payment_hash, .. } => { + *payment_hash != htlc_payment_hash + }, + _ => true, + } + }) { + return None; + } + + // First check if we're absolutely going to fail - if we need + // to replay this claim to get the preimage into the inbound + // edge monitor but the channel is closed (and thus we'll + // immediately panic if we call claim_funds_from_hop). + if outpoint_to_peer.get(&prev_hop.outpoint).is_none() { + log_error!(args.logger, + "We need to replay the HTLC claim for payment_hash {} (preimage {}) but cannot do so as the HTLC was forwarded prior to LDK 0.0.124.\ + All HTLCs which were forwarded by LDK 0.0.123 and prior must be resolved prior to upgrading to LDK 0.1", + htlc_payment_hash, + payment_preimage, + ); + fail_read = true; + } + + // At this point we're confident we need the claim, but the + // inbound edge channel is still live. As long as this remains + // the case, we can conceivably proceed, but we run some risk + // of panicking at runtime. The user ideally should have read + // the release notes and we wouldn't be here, but we go ahead + // and let things run in the hope that it'll all just work out. + log_error!(args.logger, + "We need to replay the HTLC claim for payment_hash {} (preimage {}) but don't have all the required information to do so reliably.\ + As long as the channel for the inbound edge of the forward remains open, this may work okay, but we may panic at runtime!\ + All HTLCs which were forwarded by LDK 0.0.123 and prior must be resolved prior to upgrading to LDK 0.1\ + Continuing anyway, though panics may occur!", + htlc_payment_hash, + payment_preimage, + ); + } + Some((htlc_source, payment_preimage, htlc.amount_msat, // Check if `counterparty_opt.is_none()` to see if the // downstream chan is closed (because we don't have a @@ -12797,6 +12878,9 @@ where for tuple in outbound_claimed_htlcs_iter { pending_claims_to_replay.push(tuple); } + if fail_read { + return Err(DecodeError::InvalidValue); + } } } diff --git a/pending_changelog/matt-no-upgrade-skip.txt b/pending_changelog/matt-no-upgrade-skip.txt new file mode 100644 index 000000000..4c49f2fe0 --- /dev/null +++ b/pending_changelog/matt-no-upgrade-skip.txt @@ -0,0 +1,6 @@ +## Backwards Compatibility + * Nodes with pending forwarded HTLCs cannot be upgraded directly from 0.0.123 + or earlier to 0.1. Instead, they must first either resolve all pending + forwarded HTLCs (including those pending resolution on-chain), or run + 0.0.124 and resolve any HTLCs which were originally forwarded running + 0.0.123 or earlier. -- 2.39.5