From 28c70ac50685b546a2fbaebea34acf1aa364cf66 Mon Sep 17 00:00:00 2001 From: Matt Corallo Date: Mon, 18 Apr 2022 15:42:11 +0000 Subject: [PATCH] Ensure all HTLCs for a claimed payment are claimed on startup While the HTLC-claim process happens across all MPP parts under one lock, this doesn't imply that they are claimed fully atomically on disk. Ultimately, an application can crash after persisting one `ChannelMonitorUpdate` out of multiple monitor updates needed for the full claim. Previously, this would leave us in a very bad state - because of the all-channels-available check in `claim_funds` we'd refuse to claim the payment again on restart (even though the `PaymentReceived` event will be passed to the user again), and we'd end up having partially claimed the payment! The fix for the consistency part of this issue is pretty straightforward - just check for this condition on startup and complete the claim across all channels/`ChannelMonitor`s if we detect it. This still leaves us in a confused state from the perspective of the user, however - we've actually claimed a payment but when they call `claim_funds` we return `false` indicating it could not be claimed. --- lightning/src/chain/channelmonitor.rs | 7 +- lightning/src/ln/channel.rs | 26 ++++ lightning/src/ln/channelmanager.rs | 34 +++- lightning/src/ln/functional_test_utils.rs | 10 +- lightning/src/ln/functional_tests.rs | 180 ++++++++++++++++++++++ 5 files changed, 252 insertions(+), 5 deletions(-) diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs index 738fff38..fd66e585 100644 --- a/lightning/src/chain/channelmonitor.rs +++ b/lightning/src/chain/channelmonitor.rs @@ -1085,7 +1085,8 @@ impl ChannelMonitor { self.inner.lock().unwrap().provide_latest_holder_commitment_tx(holder_commitment_tx, htlc_outputs).map_err(|_| ()) } - #[cfg(test)] + /// This is used to provide payment preimage(s) out-of-band during startup without updating the + /// off-chain state with a new commitment transaction. pub(crate) fn provide_payment_preimage( &self, payment_hash: &PaymentHash, @@ -1631,6 +1632,10 @@ impl ChannelMonitor { res } + + pub(crate) fn get_stored_preimages(&self) -> HashMap { + self.inner.lock().unwrap().payment_preimages.clone() + } } /// Compares a broadcasted commitment transaction's HTLCs with those in the latest state, diff --git a/lightning/src/ln/channel.rs b/lightning/src/ln/channel.rs index 43032c51..1d204d18 100644 --- a/lightning/src/ln/channel.rs +++ b/lightning/src/ln/channel.rs @@ -1703,6 +1703,28 @@ impl Channel { make_funding_redeemscript(&self.get_holder_pubkeys().funding_pubkey, self.counterparty_funding_pubkey()) } + /// Claims an HTLC while we're disconnected from a peer, dropping the ChannelMonitorUpdate + /// entirely. + /// + /// The ChannelMonitor for this channel MUST be updated out-of-band with the preimage provided + /// (i.e. without calling [`crate::chain::Watch::update_channel`]). + /// + /// The HTLC claim will end up in the holding cell (because the caller must ensure the peer is + /// disconnected). + pub fn claim_htlc_while_disconnected_dropping_mon_update + (&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L) + where L::Target: Logger { + // Assert that we'll add the HTLC claim to the holding cell in `get_update_fulfill_htlc` + // (see equivalent if condition there). + assert!(self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32) != 0); + let mon_update_id = self.latest_monitor_update_id; // Forget the ChannelMonitor update + let fulfill_resp = self.get_update_fulfill_htlc(htlc_id_arg, payment_preimage_arg, logger); + self.latest_monitor_update_id = mon_update_id; + if let UpdateFulfillFetch::NewClaim { msg, .. } = fulfill_resp { + assert!(msg.is_none()); // The HTLC must have ended up in the holding cell. + } + } + fn get_update_fulfill_htlc(&mut self, htlc_id_arg: u64, payment_preimage_arg: PaymentPreimage, logger: &L) -> UpdateFulfillFetch where L::Target: Logger { // Either ChannelFunded got set (which means it won't be unset) or there is no way any // caller thought we could have something claimed (cause we wouldn't have accepted in an @@ -1765,6 +1787,10 @@ impl Channel { }; if (self.channel_state & (ChannelState::AwaitingRemoteRevoke as u32 | ChannelState::PeerDisconnected as u32 | ChannelState::MonitorUpdateFailed as u32)) != 0 { + // Note that this condition is the same as the assertion in + // `claim_htlc_while_disconnected_dropping_mon_update` and must match exactly - + // `claim_htlc_while_disconnected_dropping_mon_update` would not work correctly if we + // do not not get into this branch. for pending_update in self.holding_cell_htlc_updates.iter() { match pending_update { &HTLCUpdateAwaitingACK::ClaimHTLC { htlc_id, .. } => { diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs index 71ae6170..a62f4480 100644 --- a/lightning/src/ln/channelmanager.rs +++ b/lightning/src/ln/channelmanager.rs @@ -6698,7 +6698,7 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> // payments which are still in-flight via their on-chain state. // We only rebuild the pending payments map if we were most recently serialized by // 0.0.102+ - for (_, monitor) in args.channel_monitors { + for (_, monitor) in args.channel_monitors.iter() { if by_id.get(&monitor.get_funding_txo().0.to_channel_id()).is_none() { for (htlc_source, htlc) in monitor.get_pending_outbound_htlcs() { if let HTLCSource::OutboundRoute { payment_id, session_priv, path, payment_secret, .. } = htlc_source { @@ -6824,6 +6824,38 @@ impl<'a, Signer: Sign, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> } } + for (_, monitor) in args.channel_monitors.iter() { + for (payment_hash, payment_preimage) in monitor.get_stored_preimages() { + if let Some(claimable_htlcs) = claimable_htlcs.remove(&payment_hash) { + log_info!(args.logger, "Re-claimaing HTLCs with payment hash {} due to partial-claim.", log_bytes!(payment_hash.0)); + for claimable_htlc in claimable_htlcs.1 { + // Add a holding-cell claim of the payment to the Channel, which should be + // applied ~immediately on peer reconnection. Because it won't generate a + // new commitment transaction we can just provide the payment preimage to + // the corresponding ChannelMonitor and nothing else. + // + // We do so directly instead of via the normal ChannelMonitor update + // procedure as the ChainMonitor hasn't yet been initialized, implying + // we're not allowed to call it directly yet. Further, we do the update + // without incrementing the ChannelMonitor update ID as there isn't any + // reason to. + // If we were to generate a new ChannelMonitor update ID here and then + // crash before the user finishes block connect we'd end up force-closing + // this channel as well. On the flip side, there's no harm in restarting + // without the new monitor persisted - we'll end up right back here on + // restart. + let previous_channel_id = claimable_htlc.prev_hop.outpoint.to_channel_id(); + if let Some(channel) = by_id.get_mut(&previous_channel_id) { + channel.claim_htlc_while_disconnected_dropping_mon_update(claimable_htlc.prev_hop.htlc_id, payment_preimage, &args.logger); + } + if let Some(previous_hop_monitor) = args.channel_monitors.get(&claimable_htlc.prev_hop.outpoint) { + previous_hop_monitor.provide_payment_preimage(&payment_hash, &payment_preimage, &args.tx_broadcaster, &args.fee_estimator, &args.logger); + } + } + } + } + } + let channel_manager = ChannelManager { genesis_hash, fee_estimator: args.fee_estimator, diff --git a/lightning/src/ln/functional_test_utils.rs b/lightning/src/ln/functional_test_utils.rs index c0e33e5b..15e75db8 100644 --- a/lightning/src/ln/functional_test_utils.rs +++ b/lightning/src/ln/functional_test_utils.rs @@ -1476,7 +1476,7 @@ pub fn send_along_route_with_secret<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, payment_id } -pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option, ev: MessageSendEvent, payment_received_expected: bool, expected_preimage: Option) { +pub fn do_pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option, ev: MessageSendEvent, payment_received_expected: bool, clear_recipient_events: bool, expected_preimage: Option) { let mut payment_event = SendEvent::from_event(ev); let mut prev_node = origin_node; @@ -1489,7 +1489,7 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path expect_pending_htlcs_forwardable!(node); - if idx == expected_path.len() - 1 { + if idx == expected_path.len() - 1 && clear_recipient_events { let events_2 = node.node.get_and_clear_pending_events(); if payment_received_expected { assert_eq!(events_2.len(), 1); @@ -1513,7 +1513,7 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path } else { assert!(events_2.is_empty()); } - } else { + } else if idx != expected_path.len() - 1 { let mut events_2 = node.node.get_and_clear_pending_msg_events(); assert_eq!(events_2.len(), 1); check_added_monitors!(node, 1); @@ -1525,6 +1525,10 @@ pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path } } +pub fn pass_along_path<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_path: &[&Node<'a, 'b, 'c>], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: Option, ev: MessageSendEvent, payment_received_expected: bool, expected_preimage: Option) { + do_pass_along_path(origin_node, expected_path, recv_value, our_payment_hash, our_payment_secret, ev, payment_received_expected, true, expected_preimage); +} + pub fn pass_along_route<'a, 'b, 'c>(origin_node: &Node<'a, 'b, 'c>, expected_route: &[&[&Node<'a, 'b, 'c>]], recv_value: u64, our_payment_hash: PaymentHash, our_payment_secret: PaymentSecret) { let mut events = origin_node.node.get_and_clear_pending_msg_events(); assert_eq!(events.len(), expected_route.len()); diff --git a/lightning/src/ln/functional_tests.rs b/lightning/src/ln/functional_tests.rs index 48b4b07c..e840bef4 100644 --- a/lightning/src/ln/functional_tests.rs +++ b/lightning/src/ln/functional_tests.rs @@ -9843,6 +9843,186 @@ fn test_keysend_payments_to_private_node() { claim_payment(&nodes[0], &path, test_preimage); } +fn do_test_partial_claim_before_restart(persist_both_monitors: bool) { + // Test what happens if a node receives an MPP payment, claims it, but crashes before + // persisting the ChannelManager. If `persist_both_monitors` is false, also crash after only + // updating one of the two channels' ChannelMonitors. As a result, on startup, we'll (a) still + // have the PaymentReceived event, (b) have one (or two) channel(s) that goes on chain with the + // HTLC preimage in them, and (c) optionally have one channel that is live off-chain but does + // not have the preimage tied to the still-pending HTLC. + // + // To get to the correct state, on startup we should propagate the preimage to the + // still-off-chain channel, claiming the HTLC as soon as the peer connects, with the monitor + // receiving the preimage without a state update. + let chanmon_cfgs = create_chanmon_cfgs(4); + let node_cfgs = create_node_cfgs(4, &chanmon_cfgs); + let node_chanmgrs = create_node_chanmgrs(4, &node_cfgs, &[None, None, None, None]); + + let persister: test_utils::TestPersister; + let new_chain_monitor: test_utils::TestChainMonitor; + let nodes_3_deserialized: ChannelManager; + + let mut nodes = create_network(4, &node_cfgs, &node_chanmgrs); + + create_announced_chan_between_nodes_with_value(&nodes, 0, 1, 100_000, 0, InitFeatures::known(), InitFeatures::known()); + create_announced_chan_between_nodes_with_value(&nodes, 0, 2, 100_000, 0, InitFeatures::known(), InitFeatures::known()); + let chan_id_persisted = create_announced_chan_between_nodes_with_value(&nodes, 1, 3, 100_000, 0, InitFeatures::known(), InitFeatures::known()).2; + let chan_id_not_persisted = create_announced_chan_between_nodes_with_value(&nodes, 2, 3, 100_000, 0, InitFeatures::known(), InitFeatures::known()).2; + + // Create an MPP route for 15k sats, more than the default htlc-max of 10% + let (mut route, payment_hash, payment_preimage, payment_secret) = get_route_and_payment_hash!(nodes[0], nodes[3], 15_000_000); + assert_eq!(route.paths.len(), 2); + route.paths.sort_by(|path_a, _| { + // Sort the path so that the path through nodes[1] comes first + if path_a[0].pubkey == nodes[1].node.get_our_node_id() { + core::cmp::Ordering::Less } else { core::cmp::Ordering::Greater } + }); + + nodes[0].node.send_payment(&route, payment_hash, &Some(payment_secret)).unwrap(); + check_added_monitors!(nodes[0], 2); + + // Send the payment through to nodes[3] *without* clearing the PaymentReceived event + let mut send_events = nodes[0].node.get_and_clear_pending_msg_events(); + assert_eq!(send_events.len(), 2); + do_pass_along_path(&nodes[0], &[&nodes[1], &nodes[3]], 15_000_000, payment_hash, Some(payment_secret), send_events[0].clone(), true, false, None); + do_pass_along_path(&nodes[0], &[&nodes[2], &nodes[3]], 15_000_000, payment_hash, Some(payment_secret), send_events[1].clone(), true, false, None); + + // Now that we have an MPP payment pending, get the latest encoded copies of nodes[3]'s + // monitors and ChannelManager, for use later, if we don't want to persist both monitors. + let mut original_monitor = test_utils::TestVecWriter(Vec::new()); + if !persist_both_monitors { + for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() { + if outpoint.to_channel_id() == chan_id_not_persisted { + assert!(original_monitor.0.is_empty()); + nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut original_monitor).unwrap(); + } + } + } + + let mut original_manager = test_utils::TestVecWriter(Vec::new()); + nodes[3].node.write(&mut original_manager).unwrap(); + + expect_payment_received!(nodes[3], payment_hash, payment_secret, 15_000_000); + + nodes[3].node.claim_funds(payment_preimage); + check_added_monitors!(nodes[3], 2); + + // Now fetch one of the two updated ChannelMonitors from nodes[3], and restart pretending we + // crashed in between the two persistence calls - using one old ChannelMonitor and one new one, + // with the old ChannelManager. + let mut updated_monitor = test_utils::TestVecWriter(Vec::new()); + for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() { + if outpoint.to_channel_id() == chan_id_persisted { + assert!(updated_monitor.0.is_empty()); + nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut updated_monitor).unwrap(); + } + } + // If `persist_both_monitors` is set, get the second monitor here as well + if persist_both_monitors { + for outpoint in nodes[3].chain_monitor.chain_monitor.list_monitors() { + if outpoint.to_channel_id() == chan_id_not_persisted { + assert!(original_monitor.0.is_empty()); + nodes[3].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap().write(&mut original_monitor).unwrap(); + } + } + } + + // Now restart nodes[3]. + persister = test_utils::TestPersister::new(); + let keys_manager = &chanmon_cfgs[3].keys_manager; + new_chain_monitor = test_utils::TestChainMonitor::new(Some(nodes[3].chain_source), nodes[3].tx_broadcaster.clone(), nodes[3].logger, node_cfgs[3].fee_estimator, &persister, keys_manager); + nodes[3].chain_monitor = &new_chain_monitor; + let mut monitors = Vec::new(); + for mut monitor_data in [original_monitor, updated_monitor].iter() { + let (_, mut deserialized_monitor) = <(BlockHash, ChannelMonitor)>::read(&mut &monitor_data.0[..], keys_manager).unwrap(); + monitors.push(deserialized_monitor); + } + + let config = UserConfig::default(); + nodes_3_deserialized = { + let mut channel_monitors = HashMap::new(); + for monitor in monitors.iter_mut() { + channel_monitors.insert(monitor.get_funding_txo().0, monitor); + } + <(BlockHash, ChannelManager)>::read(&mut &original_manager.0[..], ChannelManagerReadArgs { + default_config: config, + keys_manager, + fee_estimator: node_cfgs[3].fee_estimator, + chain_monitor: nodes[3].chain_monitor, + tx_broadcaster: nodes[3].tx_broadcaster.clone(), + logger: nodes[3].logger, + channel_monitors, + }).unwrap().1 + }; + nodes[3].node = &nodes_3_deserialized; + + for monitor in monitors { + // On startup the preimage should have been copied into the non-persisted monitor: + assert!(monitor.get_stored_preimages().contains_key(&payment_hash)); + nodes[3].chain_monitor.watch_channel(monitor.get_funding_txo().0.clone(), monitor).unwrap(); + } + check_added_monitors!(nodes[3], 2); + + nodes[1].node.peer_disconnected(&nodes[3].node.get_our_node_id(), false); + nodes[2].node.peer_disconnected(&nodes[3].node.get_our_node_id(), false); + + // During deserialization, we should have closed one channel and broadcast its latest + // commitment transaction. We should also still have the original PaymentReceived event we + // never finished processing. + let events = nodes[3].node.get_and_clear_pending_events(); + assert_eq!(events.len(), if persist_both_monitors { 3 } else { 2 }); + if let Event::PaymentReceived { amt: 15_000_000, .. } = events[0] { } else { panic!(); } + if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[1] { } else { panic!(); } + if persist_both_monitors { + if let Event::ChannelClosed { reason: ClosureReason::OutdatedChannelManager, .. } = events[2] { } else { panic!(); } + } + + assert_eq!(nodes[3].node.list_channels().len(), if persist_both_monitors { 0 } else { 1 }); + if !persist_both_monitors { + // If one of the two channels is still live, reveal the payment preimage over it. + + nodes[3].node.peer_connected(&nodes[2].node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty(), remote_network_address: None }); + let reestablish_1 = get_chan_reestablish_msgs!(nodes[3], nodes[2]); + nodes[2].node.peer_connected(&nodes[3].node.get_our_node_id(), &msgs::Init { features: InitFeatures::empty(), remote_network_address: None }); + let reestablish_2 = get_chan_reestablish_msgs!(nodes[2], nodes[3]); + + nodes[2].node.handle_channel_reestablish(&nodes[3].node.get_our_node_id(), &reestablish_1[0]); + get_event_msg!(nodes[2], MessageSendEvent::SendChannelUpdate, nodes[3].node.get_our_node_id()); + assert!(nodes[2].node.get_and_clear_pending_msg_events().is_empty()); + + nodes[3].node.handle_channel_reestablish(&nodes[2].node.get_our_node_id(), &reestablish_2[0]); + + // Once we call `get_and_clear_pending_msg_events` the holding cell is cleared and the HTLC + // claim should fly. + let ds_msgs = nodes[3].node.get_and_clear_pending_msg_events(); + check_added_monitors!(nodes[3], 1); + assert_eq!(ds_msgs.len(), 2); + if let MessageSendEvent::SendChannelUpdate { .. } = ds_msgs[1] {} else { panic!(); } + + let cs_updates = match ds_msgs[0] { + MessageSendEvent::UpdateHTLCs { ref updates, .. } => { + nodes[2].node.handle_update_fulfill_htlc(&nodes[3].node.get_our_node_id(), &updates.update_fulfill_htlcs[0]); + check_added_monitors!(nodes[2], 1); + let cs_updates = get_htlc_update_msgs!(nodes[2], nodes[0].node.get_our_node_id()); + expect_payment_forwarded!(nodes[2], nodes[0], nodes[3], Some(1000), false, false); + commitment_signed_dance!(nodes[2], nodes[3], updates.commitment_signed, false, true); + cs_updates + } + _ => panic!(), + }; + + nodes[0].node.handle_update_fulfill_htlc(&nodes[2].node.get_our_node_id(), &cs_updates.update_fulfill_htlcs[0]); + commitment_signed_dance!(nodes[0], nodes[2], cs_updates.commitment_signed, false, true); + expect_payment_sent!(nodes[0], payment_preimage); + } +} + +#[test] +fn test_partial_claim_before_restart() { + do_test_partial_claim_before_restart(false); + do_test_partial_claim_before_restart(true); +} + /// The possible events which may trigger a `max_dust_htlc_exposure` breach #[derive(Clone, Copy, PartialEq)] enum ExposureEvent { -- 2.30.2