Fail HTLCs which were removed from a channel but not persisted 2022-11-reload-htlc
authorMatt Corallo <git@bluematt.me>
Wed, 16 Nov 2022 02:20:03 +0000 (02:20 +0000)
committerMatt Corallo <git@bluematt.me>
Mon, 5 Dec 2022 20:27:35 +0000 (20:27 +0000)
When a channel is force-closed, if a `ChannelMonitor` update is
completed but a `ChannelManager` persist has not yet happened,
HTLCs which were removed in the latest (persisted) `ChannelMonitor`
update will not be failed even though they do not appear in the
commitment transaction which went on chain. This is because the
`ChannelManager` thinks the `ChannelMonitor` is responsible for
them (as it is stale), but the `ChannelMonitor` has no knowledge of
the HTLC at all (as it is not stale).

The fix for this is relatively simple - we need to check for this
specific case and fail back such HTLCs when deserializing a
`ChannelManager`

lightning/src/ln/channel.rs
lightning/src/ln/channelmanager.rs
lightning/src/ln/reload_tests.rs

index 95ae6987710d904a5508de4960df008340ae1779..561b398b91d0b4321d75d6c0851a4bcfa77ece48 100644 (file)
@@ -5951,15 +5951,16 @@ impl<Signer: Sign> Channel<Signer> {
                (monitor_update, dropped_outbound_htlcs)
        }
 
-       pub fn inflight_htlc_sources(&self) -> impl Iterator<Item=&HTLCSource> {
+       pub fn inflight_htlc_sources(&self) -> impl Iterator<Item=(&HTLCSource, &PaymentHash)> {
                self.holding_cell_htlc_updates.iter()
                        .flat_map(|htlc_update| {
                                match htlc_update {
-                                       HTLCUpdateAwaitingACK::AddHTLC { source, .. } => { Some(source) }
-                                       _ => None
+                                       HTLCUpdateAwaitingACK::AddHTLC { source, payment_hash, .. }
+                                               => Some((source, payment_hash)),
+                                       _ => None,
                                }
                        })
-                       .chain(self.pending_outbound_htlcs.iter().map(|htlc| &htlc.source))
+                       .chain(self.pending_outbound_htlcs.iter().map(|htlc| (&htlc.source, &htlc.payment_hash)))
        }
 }
 
index 2d84bb9cd7369629584c712da98cbf29d1291d7c..7f65719a57ff3a531ad0c648dea0973b4726c3df 100644 (file)
@@ -5728,7 +5728,7 @@ impl<M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelManager<M, T, K, F
                let mut inflight_htlcs = InFlightHtlcs::new();
 
                for chan in self.channel_state.lock().unwrap().by_id.values() {
-                       for htlc_source in chan.inflight_htlc_sources() {
+                       for (htlc_source, _) in chan.inflight_htlc_sources() {
                                if let HTLCSource::OutboundRoute { path, .. } = htlc_source {
                                        inflight_htlcs.process_path(path, self.get_our_node_id());
                                }
@@ -5746,6 +5746,12 @@ impl<M: Deref, T: Deref, K: Deref, F: Deref, L: Deref> ChannelManager<M, T, K, F
                events.into_inner()
        }
 
+       #[cfg(test)]
+       pub fn pop_pending_event(&self) -> Option<events::Event> {
+               let mut events = self.pending_events.lock().unwrap();
+               if events.is_empty() { None } else { Some(events.remove(0)) }
+       }
+
        #[cfg(test)]
        pub fn has_pending_payments(&self) -> bool {
                !self.pending_outbound_payments.lock().unwrap().is_empty()
@@ -7206,6 +7212,25 @@ impl<'a, M: Deref, T: Deref, K: Deref, F: Deref, L: Deref>
                                                user_channel_id: channel.get_user_id(),
                                                reason: ClosureReason::OutdatedChannelManager
                                        });
+                                       for (channel_htlc_source, payment_hash) in channel.inflight_htlc_sources() {
+                                               let mut found_htlc = false;
+                                               for (monitor_htlc_source, _) in monitor.get_all_current_outbound_htlcs() {
+                                                       if *channel_htlc_source == monitor_htlc_source { found_htlc = true; break; }
+                                               }
+                                               if !found_htlc {
+                                                       // If we have some HTLCs in the channel which are not present in the newer
+                                                       // ChannelMonitor, they have been removed and should be failed back to
+                                                       // ensure we don't forget them entirely. Note that if the missing HTLC(s)
+                                                       // were actually claimed we'd have generated and ensured the previous-hop
+                                                       // claim update ChannelMonitor updates were persisted prior to persising
+                                                       // the ChannelMonitor update for the forward leg, so attempting to fail the
+                                                       // backwards leg of the HTLC will simply be rejected.
+                                                       log_info!(args.logger,
+                                                               "Failing HTLC with hash {} as it is missing in the ChannelMonitor for channel {} but was present in the (stale) ChannelManager",
+                                                               log_bytes!(channel.channel_id()), log_bytes!(payment_hash.0));
+                                                       failed_htlcs.push((channel_htlc_source.clone(), *payment_hash, channel.get_counterparty_node_id(), channel.channel_id()));
+                                               }
+                                       }
                                } else {
                                        log_info!(args.logger, "Successfully loaded channel {}", log_bytes!(channel.channel_id()));
                                        if let Some(short_channel_id) = channel.get_short_channel_id() {
index 7e10f970aea6a8c3b284bb586c26920683888d57..88185315148bc83ce3466b72a37ace7a5912913f 100644 (file)
@@ -933,3 +933,72 @@ fn forwarded_payment_no_manager_persistence() {
        do_forwarded_payment_no_manager_persistence(true, false);
        do_forwarded_payment_no_manager_persistence(false, false);
 }
+
+#[test]
+fn removed_payment_no_manager_persistence() {
+       // If an HTLC is failed to us on a channel, and the ChannelMonitor persistence completes, but
+       // the corresponding ChannelManager persistence does not, we need to ensure that the HTLC is
+       // still failed back to the previous hop even though the ChannelMonitor now no longer is aware
+       // of the HTLC. This was previously broken as no attempt was made to figure out which HTLCs
+       // were left dangling when a channel was force-closed due to a stale ChannelManager.
+       let chanmon_cfgs = create_chanmon_cfgs(3);
+       let node_cfgs = create_node_cfgs(3, &chanmon_cfgs);
+       let node_chanmgrs = create_node_chanmgrs(3, &node_cfgs, &[None, None, None]);
+
+       let persister;
+       let new_chain_monitor;
+       let nodes_1_deserialized;
+
+       let mut nodes = create_network(3, &node_cfgs, &node_chanmgrs);
+
+       let chan_id_1 = create_announced_chan_between_nodes(&nodes, 0, 1, channelmanager::provided_init_features(), channelmanager::provided_init_features()).2;
+       let chan_id_2 = create_announced_chan_between_nodes(&nodes, 1, 2, channelmanager::provided_init_features(), channelmanager::provided_init_features()).2;
+
+       let (_, payment_hash, _) = route_payment(&nodes[0], &[&nodes[1], &nodes[2]], 1_000_000);
+
+       let node_encoded = nodes[1].node.encode();
+
+       nodes[2].node.fail_htlc_backwards(&payment_hash);
+       expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[2], [HTLCDestination::FailedPayment { payment_hash }]);
+       check_added_monitors!(nodes[2], 1);
+       let events = nodes[2].node.get_and_clear_pending_msg_events();
+       assert_eq!(events.len(), 1);
+       match &events[0] {
+               MessageSendEvent::UpdateHTLCs { updates: msgs::CommitmentUpdate { update_fail_htlcs, commitment_signed, .. }, .. } => {
+                       nodes[1].node.handle_update_fail_htlc(&nodes[2].node.get_our_node_id(), &update_fail_htlcs[0]);
+                       commitment_signed_dance!(nodes[1], nodes[2], commitment_signed, false);
+               },
+               _ => panic!("Unexpected event"),
+       }
+
+       let chan_0_monitor_serialized = get_monitor!(nodes[1], chan_id_1).encode();
+       let chan_1_monitor_serialized = get_monitor!(nodes[1], chan_id_2).encode();
+       reload_node!(nodes[1], node_encoded, &[&chan_0_monitor_serialized, &chan_1_monitor_serialized], persister, new_chain_monitor, nodes_1_deserialized);
+
+       match nodes[1].node.pop_pending_event().unwrap() {
+               Event::ChannelClosed { ref reason, .. } => {
+                       assert_eq!(*reason, ClosureReason::OutdatedChannelManager);
+               },
+               _ => panic!("Unexpected event"),
+       }
+
+       // Now that the ChannelManager has force-closed the channel which had the HTLC removed, it is
+       // now forgotten everywhere. The ChannelManager should have, as a side-effect of reload,
+       // learned that the HTLC is gone from the ChannelMonitor and added it to the to-fail-back set.
+       nodes[0].node.peer_disconnected(&nodes[1].node.get_our_node_id(), true);
+       reconnect_nodes(&nodes[0], &nodes[1], (false, false), (0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (false, false));
+
+       expect_pending_htlcs_forwardable_and_htlc_handling_failed!(nodes[1], [HTLCDestination::NextHopChannel { node_id: Some(nodes[2].node.get_our_node_id()), channel_id: chan_id_2 }]);
+       check_added_monitors!(nodes[1], 1);
+       let events = nodes[1].node.get_and_clear_pending_msg_events();
+       assert_eq!(events.len(), 1);
+       match &events[0] {
+               MessageSendEvent::UpdateHTLCs { updates: msgs::CommitmentUpdate { update_fail_htlcs, commitment_signed, .. }, .. } => {
+                       nodes[0].node.handle_update_fail_htlc(&nodes[1].node.get_our_node_id(), &update_fail_htlcs[0]);
+                       commitment_signed_dance!(nodes[0], nodes[1], commitment_signed, false);
+               },
+               _ => panic!("Unexpected event"),
+       }
+
+       expect_payment_failed!(nodes[0], payment_hash, false);
+}